From f663a6dec51c7510ef10b2835983190fb84b0ebf Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 14:04:39 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Create a pluggable OS layer, add in-memory version so in-memory configurations don't create on-disk files. Notes: The WiredTiger library no longer uses FILE * objects, the file descriptor and stream functions now use the same open and close functions, and both have a WT_FH handle. The sync/async versions of __wt_fsync have been merged, there's now a boolean flag that indicates wait/no-wait on the flush. The transaction log's print-log function no longer supports unspecified handles, output is always to stdout (which the wt utility redirects into another file as needed). --- dist/filelist | 11 +- dist/flags.py | 2 +- dist/log.py | 18 +- dist/s_funcs.list | 1 - dist/s_stat | 2 +- dist/s_string.ok | 7 + src/block/block_mgr.c | 6 +- src/block/block_open.c | 10 +- src/block/block_read.c | 4 +- src/block/block_write.c | 2 +- src/btree/bt_debug.c | 17 +- src/btree/bt_huffman.c | 39 ++- src/btree/bt_sync.c | 2 +- src/conn/conn_api.c | 69 +++-- src/conn/conn_handle.c | 11 +- src/conn/conn_log.c | 4 +- src/conn/conn_open.c | 22 +- src/conn/conn_stat.c | 37 +-- src/cursor/cur_backup.c | 10 +- src/evict/evict_lru.c | 38 +-- src/include/connection.h | 32 +- src/include/cursor.h | 2 +- src/include/extern.h | 48 ++- src/include/flags.h | 2 +- src/include/os.h | 213 ++++++++++++-- src/include/wt_internal.h | 10 +- src/log/log.c | 30 +- src/log/log_auto.c | 117 ++++---- src/meta/meta_turtle.c | 30 +- src/os_posix/os_dir.c | 2 + src/os_posix/os_exist.c | 38 --- src/os_posix/os_fallocate.c | 2 + src/os_posix/os_filesize.c | 62 ---- src/os_posix/os_flock.c | 38 --- src/os_posix/os_fsync.c | 121 +------- src/os_posix/os_ftruncate.c | 26 -- src/os_posix/os_getline.c | 10 +- src/os_posix/os_init.c | 31 ++ src/os_posix/os_inmemory.c | 451 +++++++++++++++++++++++++++++ src/os_posix/os_map.c | 25 +- src/os_posix/os_open.c | 316 ++++++++++---------- src/os_posix/os_posix.c | 659 ++++++++++++++++++++++++++++++++++++++++++ src/os_posix/os_remove.c | 69 ----- src/os_posix/os_rename.c | 40 --- src/os_posix/os_rw.c | 90 ------ src/os_posix/os_stdio.c | 127 -------- src/session/session_compact.c | 3 +- src/support/err.c | 25 +- src/support/filename.c | 27 +- src/txn/txn_ckpt.c | 2 +- src/txn/txn_log.c | 65 ++--- src/utilities/util_printlog.c | 2 +- 52 files changed, 1884 insertions(+), 1143 deletions(-) delete mode 100644 src/os_posix/os_exist.c delete mode 100644 src/os_posix/os_filesize.c delete mode 100644 src/os_posix/os_flock.c delete mode 100644 src/os_posix/os_ftruncate.c create mode 100644 src/os_posix/os_init.c create mode 100644 src/os_posix/os_inmemory.c create mode 100644 src/os_posix/os_posix.c delete mode 100644 src/os_posix/os_remove.c delete mode 100644 src/os_posix/os_rename.c delete mode 100644 src/os_posix/os_rw.c delete mode 100644 src/os_posix/os_stdio.c diff --git a/dist/filelist b/dist/filelist index 4ed7d7e3beb..6c931f96a9f 100644 --- a/dist/filelist +++ b/dist/filelist @@ -106,15 +106,13 @@ src/os_posix/os_alloc.c src/os_posix/os_dir.c src/os_posix/os_dlopen.c src/os_posix/os_errno.c -src/os_posix/os_exist.c src/os_posix/os_fallocate.c -src/os_posix/os_filesize.c -src/os_posix/os_flock.c src/os_posix/os_fsync.c -src/os_posix/os_ftruncate.c src/os_posix/os_getenv.c src/os_posix/os_getline.c src/os_posix/os_getopt.c +src/os_posix/os_init.c +src/os_posix/os_inmemory.c src/os_posix/os_map.c src/os_posix/os_mtx_cond.c src/os_posix/os_mtx_rw.c @@ -122,12 +120,9 @@ src/os_posix/os_once.c src/os_posix/os_open.c src/os_posix/os_pagesize.c src/os_posix/os_path.c +src/os_posix/os_posix.c src/os_posix/os_priv.c -src/os_posix/os_remove.c -src/os_posix/os_rename.c -src/os_posix/os_rw.c src/os_posix/os_sleep.c -src/os_posix/os_stdio.c src/os_posix/os_strtouq.c src/os_posix/os_thread.c src/os_posix/os_time.c diff --git a/dist/flags.py b/dist/flags.py index f500e3b1ae1..3e9d8cd890c 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -13,7 +13,7 @@ flags = { 'FILE_TYPE_DATA', 'FILE_TYPE_DIRECTORY', 'FILE_TYPE_LOG', - 'FILE_TYPE_TURTLE', + 'FILE_TYPE_REGULAR', ], 'log_scan' : [ 'LOGSCAN_FIRST', diff --git a/dist/log.py b/dist/log.py index 6d35bf2e718..ad93a6dfb44 100644 --- a/dist/log.py +++ b/dist/log.py @@ -89,7 +89,7 @@ def printf_line(f, optype, i, ishex): ifbegin = 'if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {' + nl_indent if postcomma == '': precomma = ',\\n' - body = '%s%s(__wt_fprintf(out,' % ( + body = '%s%s(__wt_fprintf(session, WT_STDOUT,' % ( printf_setup(f, ishex, nl_indent), 'WT_ERR' if has_escape(optype.fields) else 'WT_RET') + \ '%s "%s \\"%s\\": \\"%s\\"%s",%s));' % ( @@ -292,16 +292,16 @@ __wt_logop_%(name)s_unpack( last_field = optype.fields[-1] tfile.write(''' int -__wt_logop_%(name)s_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_%(name)s_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { %(arg_ret)s\t%(arg_decls)s \t%(arg_unused)s%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack( \t session, pp, end%(arg_addrs)s)); -\tWT_RET(__wt_fprintf(out, " \\"optype\\": \\"%(name)s\\",\\n")); +\tWT_RET(__wt_fprintf(session, WT_STDOUT, +\t " \\"optype\\": \\"%(name)s\\",\\n")); \t%(print_args)s %(arg_fini)s } @@ -324,9 +324,8 @@ __wt_logop_%(name)s_print( # Emit the printlog entry point tfile.write(''' int -__wt_txn_op_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_txn_op_printlog(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { \tuint32_t optype, opsize; @@ -342,8 +341,7 @@ for optype in log_data.optypes: tfile.write(''' \tcase %(macro)s: -\t\tWT_RET(%(print_func)s(session, pp, end, out, -\t\t flags)); +\t\tWT_RET(%(print_func)s(session, pp, end, flags)); \t\tbreak; ''' % { 'macro' : optype.macro_name(), diff --git a/dist/s_funcs.list b/dist/s_funcs.list index 8d32eecdfb7..d13f45ab4da 100644 --- a/dist/s_funcs.list +++ b/dist/s_funcs.list @@ -22,7 +22,6 @@ __wt_debug_set_verbose __wt_debug_tree __wt_debug_tree_all __wt_debug_tree_shape -__wt_fsync __wt_lex_compare __wt_lex_compare_skip __wt_log_scan diff --git a/dist/s_stat b/dist/s_stat index 44c22ab56bb..3938b8e65eb 100755 --- a/dist/s_stat +++ b/dist/s_stat @@ -11,7 +11,7 @@ l=`sed \ -e 's,#.*,,' \ -e '/^$/d' \ -e 's,^,../,' filelist` -l="$l `echo ../src/include/*.i`" +l="$l `echo ../src/include/*.i ../src/include/os.h`" ( # Get the list of statistics fields. diff --git a/dist/s_string.ok b/dist/s_string.ok index 2caaddcc15a..21c9b2c3751 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -595,9 +595,11 @@ fallocate fblocks fclose fcntl +fdatasync ffc fflush ffs +fgetc fgetln fh filefrag @@ -631,6 +633,7 @@ func gcc gdb ge +getc getenv getline getone @@ -663,6 +666,7 @@ icount idx ifdef's ikey +im impl incase incr @@ -679,6 +683,7 @@ initsize initval inline inmem +inmemory insertK insertV inserters @@ -856,6 +861,7 @@ postsize powerpc pragmas pre +pread prealloc preload prepend @@ -875,6 +881,7 @@ pushms putK putV pv +pwrite py qdown qrrSS diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c index 0bb75d129e1..7db552b307c 100644 --- a/src/block/block_mgr.c +++ b/src/block/block_mgr.c @@ -411,11 +411,9 @@ __bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats) * Flush a file to disk. */ static int -__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool async) +__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool wait) { - return (async ? - __wt_fsync_async(session, bm->block->fh) : - __wt_fsync(session, bm->block->fh)); + return (__wt_fsync(session, bm->block->fh, wait)); } /* diff --git a/src/block/block_open.c b/src/block/block_open.c index d9b2f908737..3533a1f4204 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -44,8 +44,8 @@ __wt_block_manager_create( * in our space. Move any existing files out of the way and complain. */ for (;;) { - if ((ret = __wt_open(session, - filename, true, true, WT_FILE_TYPE_DATA, &fh)) == 0) + if ((ret = __wt_open(session, filename, WT_FILE_TYPE_DATA, + WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0) break; WT_ERR_TEST(ret != EEXIST, ret); @@ -73,7 +73,7 @@ __wt_block_manager_create( * Ensure the truncated file has made it to disk, then the upper-level * is never surprised. */ - WT_TRET(__wt_fsync(session, fh)); + WT_TRET(__wt_fsync(session, fh, true)); /* Close the file handle. */ WT_TRET(__wt_close(session, &fh)); @@ -226,9 +226,9 @@ __wt_block_open(WT_SESSION_IMPL *session, #endif /* Open the underlying file handle. */ - WT_ERR(__wt_open(session, filename, false, false, + WT_ERR(__wt_open(session, filename, readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA, - &block->fh)); + 0, &block->fh)); /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); diff --git a/src/block/block_read.c b/src/block/block_read.c index 6e74d7a7793..9386974238d 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -45,7 +45,7 @@ __wt_bm_preload( session, (uint8_t *)bm->map + offset, size)); #ifdef HAVE_POSIX_FADVISE - if (posix_fadvise(block->fh->fd, + if (__wt_posix_fadvise(session, block->fh, (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED) == 0) return (0); #endif @@ -108,7 +108,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, block->os_cache = 0; /* Ignore EINVAL - some file systems don't support the flag. */ - if ((ret = posix_fadvise(block->fh->fd, + if ((ret = __wt_posix_fadvise(session, block->fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0 && ret != EINVAL) WT_RET_MSG( diff --git a/src/block/block_write.c b/src/block/block_write.c index 4c6ac198fe4..5103a573c11 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -329,7 +329,7 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, if (block->os_cache_max != 0 && (block->os_cache += align_size) > block->os_cache_max) { block->os_cache = 0; - if ((ret = posix_fadvise(fh->fd, + if ((ret = __wt_posix_fadvise(session, fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0) WT_RET_MSG( session, ret, "%s: posix_fadvise", block->name); diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 7c7f8cab855..5c91e89e033 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -19,7 +19,7 @@ typedef struct { * When using the standard event handlers, the debugging output has to * do its own message handling because its output isn't line-oriented. */ - FILE *fp; /* Output file stream */ + WT_FH *fh; /* Output file stream */ WT_ITEM *msg; /* Buffered message */ WT_ITEM *tmp; /* Temporary space */ @@ -97,11 +97,8 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile) if (ofile == NULL) return (__wt_scr_alloc(session, 512, &ds->msg)); - /* If we're using a file, flush on each line. */ - WT_RET(__wt_fopen(session, ofile, WT_FHANDLE_WRITE, 0, &ds->fp)); - - (void)setvbuf(ds->fp, NULL, _IOLBF, 0); - return (0); + return (__wt_open(session, ofile, + WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_STREAM_WRITE, &ds->fh)); } /* @@ -130,7 +127,7 @@ __dmsg_wrapup(WT_DBG *ds) } /* Close any file we opened. */ - (void)__wt_fclose(&ds->fp, WT_FHANDLE_WRITE); + (void)__wt_close(session, &ds->fh); } /* @@ -155,7 +152,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...) * the output chunk, and pass it to the event handler once we see a * terminating newline. */ - if (ds->fp == NULL) { + if (ds->fh == NULL) { msg = ds->msg; for (;;) { p = (char *)msg->mem + msg->size; @@ -187,7 +184,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...) } } else { va_start(ap, fmt); - (void)__wt_vfprintf(ds->fp, fmt, ap); + (void)__wt_vfprintf(session, ds->fh, fmt, ap); va_end(ap); } } @@ -204,7 +201,7 @@ __wt_debug_addr_print( WT_DECL_RET; WT_RET(__wt_scr_alloc(session, 128, &buf)); - ret = __wt_fprintf(stderr, + ret = __wt_fprintf(session, WT_STDERR, "%s\n", __wt_addr_string(session, addr, addr_size, buf)); __wt_scr_free(session, &buf); diff --git a/src/btree/bt_huffman.c b/src/btree/bt_huffman.c index 2c0238545fb..9d83968910c 100644 --- a/src/btree/bt_huffman.c +++ b/src/btree/bt_huffman.c @@ -134,9 +134,9 @@ static int __wt_huffman_read(WT_SESSION_IMPL *, */ static int __huffman_confchk_file( - WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, FILE **fpp) + WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FH **fhp) { - FILE *fp; + WT_FH *fh; WT_DECL_RET; size_t len; char *fname; @@ -157,14 +157,14 @@ __huffman_confchk_file( /* Check the file exists. */ WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname)); - WT_ERR(__wt_fopen(session, - fname, WT_FHANDLE_READ, WT_FOPEN_FIXED, &fp)); + WT_ERR(__wt_open(session, fname, + WT_FILE_TYPE_REGULAR, WT_OPEN_FIXED | WT_STREAM_READ, &fh)); /* Optionally return the file handle. */ - if (fpp == NULL) - (void)__wt_fclose(&fp, WT_FHANDLE_READ); + if (fhp == NULL) + (void)__wt_close(session, &fh); else - *fpp = fp; + *fhp = fh; err: __wt_free(session, fname); @@ -298,22 +298,24 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, struct __wt_huffman_table **tablep, u_int *entriesp, u_int *numbytesp) { struct __wt_huffman_table *table, *tp; - FILE *fp; + WT_DECL_ITEM(tmp); WT_DECL_RET; + WT_FH *fh; int64_t symbol, frequency; u_int entries, lineno; + int n; bool is_utf8; *tablep = NULL; *entriesp = *numbytesp = 0; - fp = NULL; + fh = NULL; table = NULL; /* * Try and open the backing file. */ - WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fp)); + WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fh)); /* * UTF-8 table is 256 bytes, with a range of 0-255. @@ -329,9 +331,13 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, WT_ERR(__wt_calloc_def(session, entries, &table)); } - for (tp = table, lineno = 1; (ret = - fscanf(fp, "%" SCNi64 " %" SCNi64, &symbol, &frequency)) != EOF; - ++tp, ++lineno) { + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + for (tp = table, lineno = 1;; ++tp, ++lineno) { + WT_ERR(__wt_getline(session, tmp, fh)); + if (tmp->size == 0) + break; + n = sscanf( + tmp->data, "%" SCNi64 " %" SCNi64, &symbol, &frequency); /* * Entries is 0-based, that is, there are (entries +1) possible * values that can be configured. The line number is 1-based, so @@ -343,7 +349,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, "Huffman table file %.*s is corrupted, " "more than %" PRIu32 " entries", (int)ip->len, ip->str, entries + 1); - if (ret != 2) + if (n != 2) WT_ERR_MSG(session, EINVAL, "line %u of Huffman table file %.*s is corrupted: " "expected two unsigned integral values", @@ -365,7 +371,6 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, tp->symbol = (uint32_t)symbol; tp->frequency = (uint32_t)frequency; } - ret = ferror(fp) ? WT_ERROR : 0; *entriesp = lineno - 1; *tablep = table; @@ -373,7 +378,9 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, if (0) { err: __wt_free(session, table); } - (void)__wt_fclose(&fp, WT_FHANDLE_READ); + (void)__wt_close(session, &fh); + + __wt_scr_free(session, &tmp); return (ret); } diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 5273f0ee2c3..6c39bd1042c 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -258,7 +258,7 @@ err: /* On error, clear any left-over tree walk. */ * but don't wait for it. */ if (ret == 0 && syncop == WT_SYNC_WRITE_LEAVES) - WT_RET(btree->bm->sync(btree->bm, session, true)); + WT_RET(btree->bm->sync(btree->bm, session, false)); return (ret); } diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index bb67185f5c9..d6e24bf14cb 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1183,7 +1183,7 @@ __conn_config_file(WT_SESSION_IMPL *session, return (0); /* Open the configuration file. */ - WT_RET(__wt_open(session, filename, false, false, 0, &fh)); + WT_RET(__wt_open(session, filename, 0, 0, &fh)); WT_ERR(__wt_filesize(session, fh, &size)); if (size == 0) goto err; @@ -1475,8 +1475,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) exist = false; if (!is_create) WT_ERR(__wt_exist(session, WT_WIREDTIGER, &exist)); - ret = __wt_open(session, - WT_SINGLETHREAD, is_create || exist, false, 0, &conn->lock_fh); + ret = __wt_open(session, WT_SINGLETHREAD, 0, + is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh); /* * If this is a read-only connection and we cannot grab the lock @@ -1504,7 +1504,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) * zero-length, and that's OK, the underlying call supports * locking past the end-of-file. */ - if (__wt_bytelock(conn->lock_fh, (wt_off_t)0, true) != 0) + if (__wt_file_lock(session, conn->lock_fh, true) != 0) WT_ERR_MSG(session, EBUSY, "WiredTiger database is already being managed by " "another process"); @@ -1530,7 +1530,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) } /* We own the lock file, optionally create the WiredTiger file. */ - ret = __wt_open(session, WT_WIREDTIGER, is_create, false, 0, &fh); + ret = __wt_open( + session, WT_WIREDTIGER, 0, is_create ? WT_OPEN_CREATE : 0, &fh); /* * If we're read-only, check for success as well as handled errors. @@ -1551,12 +1552,12 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) * as described above). Immediately release the lock, it's * just a test. */ - if (__wt_bytelock(fh, (wt_off_t)0, true) != 0) { + if (__wt_file_lock(session, fh, true) != 0) { WT_ERR_MSG(session, EBUSY, "WiredTiger database is already being managed by " "another process"); } - WT_ERR(__wt_bytelock(fh, (wt_off_t)0, false)); + WT_ERR(__wt_file_lock(session, fh, false)); } /* @@ -1577,7 +1578,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) len = (size_t)snprintf(buf, sizeof(buf), "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING); WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf)); - WT_ERR(__wt_fsync(session, fh)); + WT_ERR(__wt_fsync(session, fh, true)); } else { /* * Although exclusive and the read-only configuration settings @@ -1736,14 +1737,14 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) static int __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) { - FILE *fp; + WT_FH *fh; WT_CONFIG parser; WT_CONFIG_ITEM cval, k, v; WT_DECL_RET; bool exist; const char *base_config; - fp = NULL; + fh = NULL; base_config = NULL; /* @@ -1775,10 +1776,11 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) if (exist) return (0); - WT_RET(__wt_fopen(session, - WT_BASECONFIG_SET, WT_FHANDLE_WRITE, 0, &fp)); + WT_RET(__wt_open(session, + WT_BASECONFIG_SET, WT_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE | WT_STREAM_WRITE, &fh)); - WT_ERR(__wt_fprintf(fp, "%s\n\n", + WT_ERR(__wt_fprintf(session, fh, "%s\n\n", "# Do not modify this file.\n" "#\n" "# WiredTiger created this file when the database was created,\n" @@ -1825,18 +1827,18 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) --v.str; v.len += 2; } - WT_ERR(__wt_fprintf(fp, + WT_ERR(__wt_fprintf(session, fh, "%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str)); } WT_ERR_NOTFOUND_OK(ret); /* Flush the handle and rename the file into place. */ - ret = __wt_sync_fp_and_rename( - session, &fp, WT_BASECONFIG_SET, WT_BASECONFIG); + ret = __wt_sync_handle_and_rename( + session, &fh, WT_BASECONFIG_SET, WT_BASECONFIG); if (0) { /* Close open file handle, remove any temporary file. */ -err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_WRITE)); +err: WT_TRET(__wt_close(session, &fh)); WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET)); } @@ -1941,21 +1943,34 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, session, cval.str, cval.len, &conn->error_prefix)); /* - * We need to look for read-only early so that we can use it - * in __conn_single and whether to use the base config file. - * XXX that means we can only make the choice in __conn_single if the - * user passes it in via the config string to wiredtiger_open. + * Look for read-only early (for example, it configures use of the base + * config file). + * + * XXX + * We haven't read the WIREDTIGER_CONFIG environment variable, we need + * to fix that. */ WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval)); if (cval.val) F_SET(conn, WT_CONN_READONLY); /* - * XXX ideally, we would check "in_memory" here, so we could completely - * avoid having a database directory. However, it can be convenient to - * pass "in_memory" via the WIREDTIGER_CONFIG environment variable, and - * we haven't read it yet. + * Look for in-memory early (for example, it configures writing the base + * config file). + * + * XXX + * We haven't read the WIREDTIGER_CONFIG environment variable, we need + * to fix that. + */ + WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval)); + if (cval.val != 0) + F_SET(conn, WT_CONN_IN_MEMORY); + + /* + * After checking readonly and in-memory, but before we do anything + * that touches an underlying filesystem, configure the OS layer. */ + WT_ERR(__wt_os_init(session)); /* Get the database home. */ WT_ERR(__conn_home(session, home, cfg)); @@ -2057,10 +2072,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_config_gets(session, cfg, "session_scratch_max", &cval)); conn->session_scratch_max = (size_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval)); - if (cval.val != 0) - F_SET(conn, WT_CONN_IN_MEMORY); - WT_ERR(__wt_config_gets(session, cfg, "checkpoint_sync", &cval)); if (cval.val) F_SET(conn, WT_CONN_CKPT_SYNC); diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 16717597f4d..eb89949f79e 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -119,14 +119,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) session = conn->default_session; - /* - * Close remaining open files (before discarding the mutex, the - * underlying file-close code uses the mutex to guard lists of - * open files. - */ - if (conn->lock_fh) - WT_TRET(__wt_close(session, &conn->lock_fh)); - /* Remove from the list of connections. */ __wt_spin_lock(session, &__wt_process.spinlock); TAILQ_REMOVE(&__wt_process.connqh, conn, q); @@ -160,6 +152,9 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); + /* Destroy the OS configuration. */ + WT_TRET(__wt_os_cleanup(session)); + __wt_free(NULL, conn); return (ret); } diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 5999cf20b3b..ab3a9507958 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -397,7 +397,7 @@ __log_file_server(void *arg) * to move the sync_lsn into the next file for * later syncs. */ - WT_ERR(__wt_fsync(session, close_fh)); + WT_ERR(__wt_fsync(session, close_fh, true)); /* * We want to make sure the file size reflects * actual data and has minimal pre-allocated @@ -449,7 +449,7 @@ __log_file_server(void *arg) log->bg_sync_lsn.l.file) || (log->sync_lsn.l.file < min_lsn.l.file)) continue; - WT_ERR(__wt_fsync(session, log->log_fh)); + WT_ERR(__wt_fsync(session, log->log_fh, true)); __wt_spin_lock(session, &log->log_sync_lock); locked = true; /* diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 58577b4587d..f682ab1ce77 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -76,7 +76,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_CONNECTION *wt_conn; WT_DECL_RET; WT_DLH *dlh; - WT_FH *fh; WT_SESSION_IMPL *s, *session; WT_TXN_GLOBAL *txn_global; u_int i; @@ -150,20 +149,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_TRET(__wt_conn_remove_encryptor(session)); WT_TRET(__wt_conn_remove_extractor(session)); - /* - * Complain if files weren't closed, ignoring the lock file, we'll - * close it in a minute. - */ - TAILQ_FOREACH(fh, &conn->fhqh, q) { - if (fh == conn->lock_fh) - continue; - - __wt_errx(session, - "Connection has open file handles: %s", fh->name); - WT_TRET(__wt_close(session, &fh)); - fh = TAILQ_FIRST(&conn->fhqh); - } - /* Disconnect from shared cache - must be before cache destroy. */ WT_TRET(__wt_conn_cache_pool_destroy(session)); @@ -182,6 +167,13 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_TRET(__wt_dlclose(session, dlh)); } + /* Close the lock file, opening up the database to other connections. */ + if (conn->lock_fh != NULL) + WT_TRET(__wt_close(session, &conn->lock_fh)); + + /* Close any file handles left open. */ + WT_TRET(__wt_close_connection_close(session)); + /* * Close the internal (default) session, and switch back to the dummy * session in case of any error messages from the remaining operations diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index d6e59a50da5..fccc4786402 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -209,10 +209,11 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats) } if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) { - WT_ERR(__wt_fprintf(conn->stat_fp, + WT_ERR(__wt_fprintf(session, conn->stat_fh, "{\"version\":\"%s\",\"localTime\":\"%s\"", WIREDTIGER_VERSION_STRING, conn->stat_stamp)); - WT_ERR(__wt_fprintf(conn->stat_fp, ",\"wiredTiger\":{")); + WT_ERR(__wt_fprintf( + session, conn->stat_fh, ",\"wiredTiger\":{")); while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val)); /* Check if we are starting a new section. */ @@ -224,23 +225,23 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats) strncmp(desc, tmp->data, tmp->size) != 0) { WT_ERR(__wt_buf_set( session, tmp, desc, prefixlen)); - WT_ERR(__wt_fprintf(conn->stat_fp, + WT_ERR(__wt_fprintf(session, conn->stat_fh, "%s\"%.*s\":{", first ? "" : "},", (int)prefixlen, desc)); first = false; groupfirst = true; } - WT_ERR(__wt_fprintf(conn->stat_fp, + WT_ERR(__wt_fprintf(session, conn->stat_fh, "%s\"%s\":%" PRId64, groupfirst ? "" : ",", endprefix + 2, val)); groupfirst = false; } WT_ERR_NOTFOUND_OK(ret); - WT_ERR(__wt_fprintf(conn->stat_fp, "}}}\n")); + WT_ERR(__wt_fprintf(session, conn->stat_fh, "}}}\n")); } else { while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val)); - WT_ERR(__wt_fprintf(conn->stat_fp, + WT_ERR(__wt_fprintf(session, conn->stat_fh, "%s %" PRId64 " %s %s\n", conn->stat_stamp, val, name, desc)); } @@ -349,11 +350,11 @@ err: if (locked) static int __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) { - FILE *log_file; - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; struct timespec ts; struct tm *tm, _tm; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *log_file; conn = S2C(session); @@ -366,16 +367,18 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) WT_RET_MSG(session, ENOMEM, "strftime path conversion"); /* If the path has changed, cycle the log file. */ - if ((log_file = conn->stat_fp) == NULL || + if ((log_file = conn->stat_fh) == NULL || path == NULL || strcmp(tmp->mem, path->mem) != 0) { - conn->stat_fp = NULL; - WT_RET(__wt_fclose(&log_file, WT_FHANDLE_APPEND)); + conn->stat_fh = NULL; + WT_RET(__wt_close(session, &log_file)); if (path != NULL) (void)strcpy(path->mem, tmp->mem); - WT_RET(__wt_fopen(session, - tmp->mem, WT_FHANDLE_APPEND, WT_FOPEN_FIXED, &log_file)); + WT_RET(__wt_open(session, tmp->mem, + WT_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_OPEN_FIXED | WT_STREAM_APPEND, + &log_file)); } - conn->stat_fp = log_file; + conn->stat_fh = log_file; /* Create the entry prefix for this time of day. */ if (strftime(tmp->mem, tmp->memsize, conn->stat_format, tm) == 0) @@ -408,7 +411,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) WT_RET(__statlog_lsm_apply(session)); /* Flush. */ - return (__wt_fflush(conn->stat_fp)); + return (__wt_fsync(session, conn->stat_fh, true)); } /* @@ -594,7 +597,7 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) conn->stat_session = NULL; conn->stat_tid_set = false; conn->stat_format = NULL; - WT_TRET(__wt_fclose(&conn->stat_fp, WT_FHANDLE_APPEND)); + WT_TRET(__wt_close(session, &conn->stat_fh)); conn->stat_path = NULL; conn->stat_sources = NULL; conn->stat_stamp = NULL; diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index b097a8c08aa..46415291b88 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -250,7 +250,7 @@ __backup_start( * Close any hot backup file. * We're about to open the incremental backup file. */ - WT_TRET(__wt_fclose(&cb->bfp, WT_FHANDLE_WRITE)); + WT_TRET(__wt_close(session, &cb->bfh)); WT_ERR(__backup_file_create(session, cb, log_only)); WT_ERR(__backup_list_append( session, cb, WT_INCREMENTAL_BACKUP)); @@ -268,7 +268,7 @@ __backup_start( } err: /* Close the hot backup file. */ - WT_TRET(__wt_fclose(&cb->bfp, WT_FHANDLE_WRITE)); + WT_TRET(__wt_close(session, &cb->bfh)); if (ret != 0) { WT_TRET(__backup_cleanup_handles(session, cb)); WT_TRET(__backup_stop(session)); @@ -410,9 +410,9 @@ static int __backup_file_create( WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool incremental) { - return (__wt_fopen(session, + return (__wt_open(session, incremental ? WT_INCREMENTAL_BACKUP : WT_METADATA_BACKUP, - WT_FHANDLE_WRITE, 0, &cb->bfp)); + WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_STREAM_WRITE, &cb->bfh)); } /* @@ -471,7 +471,7 @@ __backup_list_uri_append( /* Add the metadata entry to the backup file. */ WT_RET(__wt_metadata_search(session, name, &value)); - WT_RET(__wt_fprintf(cb->bfp, "%s\n%s\n", name, value)); + WT_RET(__wt_fprintf(session, cb->bfh, "%s\n%s\n", name, value)); __wt_free(session, value); /* Add file type objects to the list of files to be copied. */ diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 3b9a7c9db62..281b9e3fcd4 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1674,9 +1674,9 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session) int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) { - FILE *fp; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *saved_dhandle; + WT_FH *fh; WT_PAGE *page; WT_REF *next_walk; uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages; @@ -1688,12 +1688,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) total_bytes = 0; if (ofile == NULL) - fp = stderr; + fh = WT_STDERR; else - WT_RET(__wt_fopen(session, ofile, WT_FHANDLE_WRITE, 0, &fp)); + WT_RET(__wt_open(session, ofile, WT_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_STREAM_WRITE, &fh)); /* Note: odd string concatenation avoids spelling errors. */ - (void)__wt_fprintf(fp, "==========\n" "cache dump\n"); + (void)__wt_fprintf(session, fh, "==========\n" "cache dump\n"); saved_dhandle = session->dhandle; TAILQ_FOREACH(dhandle, &conn->dhqh, q) { @@ -1732,23 +1733,25 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) session->dhandle = NULL; if (dhandle->checkpoint == NULL) - (void)__wt_fprintf(fp, "%s(): \n", dhandle->name); + (void)__wt_fprintf(session, fh, + "%s(): \n", dhandle->name); else - (void)__wt_fprintf(fp, "%s(checkpoint=%s): \n", + (void)__wt_fprintf(session, fh, + "%s(checkpoint=%s): \n", dhandle->name, dhandle->checkpoint); if (intl_pages != 0) - (void)__wt_fprintf(fp, "\t" "internal pages: " - "%" PRIu64 " pages, %" PRIu64 + (void)__wt_fprintf(session, fh, + "\t" "internal pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", intl_pages, max_intl_bytes, intl_bytes >> 20); if (leaf_pages != 0) - (void)__wt_fprintf(fp, "\t" "leaf pages: " - "%" PRIu64 " pages, %" PRIu64 + (void)__wt_fprintf(session, fh, + "\t" "leaf pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", leaf_pages, max_leaf_bytes, leaf_bytes >> 20); if (dirty_pages != 0) - (void)__wt_fprintf(fp, "\t" "dirty pages: " - "%" PRIu64 " pages, %" PRIu64 + (void)__wt_fprintf(session, fh, + "\t" "dirty pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", dirty_pages, max_dirty_bytes, dirty_bytes >> 20); @@ -1763,12 +1766,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) if (conn->cache->overhead_pct != 0) total_bytes += (total_bytes * (uint64_t)conn->cache->overhead_pct) / 100; - (void)__wt_fprintf(fp, "cache dump: total found = %" PRIu64 "MB" - " vs tracked inuse %" PRIu64 "MB\n", + (void)__wt_fprintf(session, fh, + "cache dump: total found = %" PRIu64 + "MB vs tracked inuse %" PRIu64 "MB\n", total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20); - (void)__wt_fprintf(fp, "==========\n"); - if (fp != stderr) - WT_RET(__wt_fclose(&fp, WT_FHANDLE_WRITE)); + (void)__wt_fprintf(session, fh, "==========\n"); + if (fh != WT_STDERR) + WT_RET(__wt_close(session, &fh)); return (0); } #endif diff --git a/src/include/connection.h b/src/include/connection.h index 2255056fcf6..7e74603d650 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -25,6 +25,36 @@ struct __wt_process { /* Locked: connection queue */ TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh; WT_CACHE_POOL *cache_pool; + + void *inmemory; /* In-memory configuration cookie */ + + /* + * OS library/system call jump table, to support in-memory and readonly + * configurations as well as special devices with other non-POSIX APIs. + */ +#define WT_JUMP(func, ...) __wt_process.func(__VA_ARGS__) + int (*j_directory_sync)(WT_SESSION_IMPL *, const char *path); + int (*j_file_exist)(WT_SESSION_IMPL *, const char *, bool *); + int (*j_file_remove)(WT_SESSION_IMPL *, const char *); + int (*j_file_rename)(WT_SESSION_IMPL *, const char *, const char *); + int (*j_file_size)( + WT_SESSION_IMPL *, const char *, bool, wt_off_t *); + int (*j_handle_advise)( + WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t, int); + int (*j_handle_close)(WT_SESSION_IMPL *, WT_FH *); + int (*j_handle_getc)(WT_SESSION_IMPL *, WT_FH *, int *); + int (*j_handle_lock)(WT_SESSION_IMPL *, WT_FH *, bool); + int (*j_handle_open)( + WT_SESSION_IMPL *, WT_FH *, const char *, int, u_int); + int (*j_handle_printf)( + WT_SESSION_IMPL *, WT_FH *, const char *, va_list); + int (*j_handle_read)( + WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *); + int (*j_handle_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); + int (*j_handle_sync)(WT_SESSION_IMPL *, WT_FH *, bool); + int (*j_handle_truncate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t); + int (*j_handle_write)( + WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, const void *); }; extern WT_PROCESS __wt_process; @@ -333,7 +363,7 @@ struct __wt_connection_impl { bool stat_tid_set; /* Statistics log thread set */ WT_CONDVAR *stat_cond; /* Statistics log wait mutex */ const char *stat_format; /* Statistics log timestamp format */ - FILE *stat_fp; /* Statistics log file handle */ + WT_FH *stat_fh; /* Statistics log file handle */ char *stat_path; /* Statistics log path format */ char **stat_sources; /* Statistics log list of objects */ const char *stat_stamp; /* Statistics log entry timestamp */ diff --git a/src/include/cursor.h b/src/include/cursor.h index 2665ff83df3..559ed6682c5 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -67,7 +67,7 @@ struct __wt_cursor_backup { WT_CURSOR iface; size_t next; /* Cursor position */ - FILE *bfp; /* Backup file */ + WT_FH *bfh; /* Backup file */ uint32_t maxid; /* Maximum log file ID seen */ WT_CURSOR_BACKUP_ENTRY *list; /* List of files to be copied. */ diff --git a/src/include/extern.h b/src/include/extern.h index 55b0b8cd7ff..0f9f702dfb9 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -374,23 +374,23 @@ extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep); extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value); extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep); -extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno); extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop); -extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop); extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp); -extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value); extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep); -extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key); extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp); -extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode); extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep); -extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); -extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); +extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced); extern int __wt_log_slot_new(WT_SESSION_IMPL *session); @@ -496,20 +496,17 @@ extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); extern int __wt_errno(void); extern int __wt_map_error_rdonly(int error); extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); -extern int __wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp); extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh); extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); -extern int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep); -extern int __wt_filesize_name(WT_SESSION_IMPL *session, const char *filename, bool silent, wt_off_t *sizep); -extern int __wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock); extern int __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh); extern int __wt_directory_sync(WT_SESSION_IMPL *session, const char *path); -extern int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len); extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); -extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp); +extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); +extern int __wt_os_init(WT_SESSION_IMPL *session); +extern int __wt_os_cleanup(WT_SESSION_IMPL *session); +extern int __wt_os_inmemory(WT_SESSION_IMPL *session); +extern int __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session); extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); @@ -527,22 +524,18 @@ extern int __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); extern int __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp); extern int __wt_once(void (*init_routine)(void)); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp); +extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp); +extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, int dio_type, u_int flags, WT_FH **fhp); extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); +extern int __wt_close_connection_close(WT_SESSION_IMPL *session); extern int __wt_get_vm_pagesize(void); extern bool __wt_absolute_path(const char *path); extern const char *__wt_path_separator(void); +extern int __wt_os_posix(WT_SESSION_IMPL *session); +extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session); extern bool __wt_has_priv(void); -extern int __wt_remove(WT_SESSION_IMPL *session, const char *name); -extern int __wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to); -extern int __wt_read( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf); -extern int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, const void *buf); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); -extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, WT_FHANDLE_MODE mode_flag, u_int flags, FILE **fpp); -extern int __wt_vfprintf(FILE *fp, const char *fmt, va_list ap); -extern int __wt_fprintf(FILE *fp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))); -extern int __wt_fflush(FILE *fp); -extern int __wt_fclose(FILE **fpp, WT_FHANDLE_MODE mode_flag); extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); @@ -658,8 +651,7 @@ extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); -extern int __wt_fh_sync_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); -extern int __wt_sync_fp_and_rename( WT_SESSION_IMPL *session, FILE **fpp, const char *from, const char *to); +extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); extern int __wt_library_init(void); extern int __wt_breakpoint(void); extern void __wt_attach(WT_SESSION_IMPL *session); @@ -753,7 +745,7 @@ extern int __wt_txn_checkpoint_logread( WT_SESSION_IMPL *session, const uint8_t extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp); extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop); extern int __wt_txn_truncate_end(WT_SESSION_IMPL *session); -extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags); +extern int __wt_txn_printlog(WT_SESSION *wt_session, uint32_t flags); extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval); diff --git a/src/include/flags.h b/src/include/flags.h index a6f42a9938f..ac2d1f20ce5 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -28,7 +28,7 @@ #define WT_FILE_TYPE_DATA 0x00000002 #define WT_FILE_TYPE_DIRECTORY 0x00000004 #define WT_FILE_TYPE_LOG 0x00000008 -#define WT_FILE_TYPE_TURTLE 0x00000010 +#define WT_FILE_TYPE_REGULAR 0x00000010 #define WT_LOGSCAN_FIRST 0x00000001 #define WT_LOGSCAN_FROM_CKP 0x00000002 #define WT_LOGSCAN_ONE 0x00000004 diff --git a/src/include/os.h b/src/include/os.h index fbba7f05f88..863143387b3 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -6,29 +6,6 @@ * See the file LICENSE for redistribution information. */ -/* - * FILE handle close/open configuration. - */ -typedef enum { - WT_FHANDLE_APPEND, WT_FHANDLE_READ, WT_FHANDLE_WRITE -} WT_FHANDLE_MODE; - -#ifdef _WIN32 -/* - * Open in binary (untranslated) mode; translations involving carriage-return - * and linefeed characters are suppressed. - */ -#define WT_FOPEN_APPEND "ab" -#define WT_FOPEN_READ "rb" -#define WT_FOPEN_WRITE "wb" -#else -#define WT_FOPEN_APPEND "a" -#define WT_FOPEN_READ "r" -#define WT_FOPEN_WRITE "w" -#endif - -#define WT_FOPEN_FIXED 0x1 /* Path isn't relative to home */ - /* * Number of directory entries can grow dynamically. */ @@ -81,6 +58,17 @@ typedef enum { (t1).tv_nsec < (t2).tv_nsec ? -1 : \ (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1) +#define WT_OPEN_CREATE 0x001 /* Create is OK */ +#define WT_OPEN_EXCLUSIVE 0x002 /* Exclusive open */ +#define WT_OPEN_FIXED 0x004 /* Path isn't relative to home */ +#define WT_OPEN_READONLY 0x008 /* Readonly open */ +#define WT_STREAM_APPEND 0x010 /* Open a stream: append */ +#define WT_STREAM_READ 0x020 /* Open a stream: read */ +#define WT_STREAM_WRITE 0x040 /* Open a stream: write */ + +#define WT_STDERR ((void *)0x1) /* WT_FH to stderr */ +#define WT_STDOUT ((void *)0x2) /* WT_FH to stdout */ + struct __wt_fh { char *name; /* File name */ uint64_t name_hash; /* Hash of name */ @@ -89,6 +77,10 @@ struct __wt_fh { u_int ref; /* Reference count */ + /* + * Underlying file system handle support. + */ + FILE *fp; /* ANSI C file handle */ #ifndef _WIN32 int fd; /* POSIX file handle */ #else @@ -100,6 +92,12 @@ struct __wt_fh { wt_off_t extend_size; /* File extended size */ wt_off_t extend_len; /* File extend chunk size */ + /* + * Underlying in-memory handle support. + */ + wt_off_t off; /* Read/write offset */ + WT_ITEM buf; /* Data */ + bool direct_io; /* O_DIRECT configured */ enum { /* file extend configuration */ @@ -109,4 +107,173 @@ struct __wt_fh { WT_FALLOCATE_STD, WT_FALLOCATE_SYS } fallocate_available; bool fallocate_requires_locking; + +#define WT_FH_IN_MEMORY 0x01 /* In-memory, don't remove */ +#define WT_FH_FLUSH_ON_CLOSE 0x02 /* Flush when closing */ + uint32_t flags; }; + +/* + * OS calls that are currently just stubs. + */ +/* + * __wt_exist -- + * Return if the file exists. + */ +static inline int +__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + return (WT_JUMP(j_file_exist, session, name, existp)); +} + +/* + * __wt_posix_fadvise -- + * POSIX fadvise. + */ +static inline int +__wt_posix_fadvise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ +#if defined(HAVE_POSIX_FADVISE) + return (WT_JUMP(j_handle_advise, session, fh, offset, len, advice)); +#else + return (0); +#endif +} + +/* + * __wt_file_lock -- + * Lock/unlock a file. + */ +static inline int +__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) +{ + return (WT_JUMP(j_handle_lock, session, fh, lock)); +} + +/* + * __wt_filesize -- + * Get the size of a file in bytes, by file handle. + */ +static inline int +__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + return (WT_JUMP(j_handle_size, session, fh, sizep)); +} + +/* + * __wt_filesize_name -- + * Get the size of a file in bytes, by file name. + */ +static inline int +__wt_filesize_name( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + return (WT_JUMP(j_file_size, session, name, silent, sizep)); +} + +/* + * __wt_fsync -- + * POSIX fflush/fsync. + */ +static inline int +__wt_fsync(WT_SESSION_IMPL *session, void *fh, bool wait) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (WT_JUMP(j_handle_sync, session, fh, wait)); +} + +/* + * __wt_ftruncate -- + * POSIX ftruncate. + */ +static inline int +__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (WT_JUMP(j_handle_truncate, session, fh, len)); +} + +/* + * __wt_read -- + * POSIX pread. + */ +static inline int +__wt_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_STAT_FAST_CONN_INCR(session, read_io); + + return (WT_JUMP(j_handle_read, session, fh, offset, len, buf)); +} + +/* + * __wt_remove -- + * POSIX remove. + */ +static inline int +__wt_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (WT_JUMP(j_file_remove, session, name)); +} + +/* + * __wt_rename -- + * POSIX rename. + */ +static inline int +__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (WT_JUMP(j_file_rename, session, from, to)); +} + +/* + * __wt_write -- + * POSIX pwrite. + */ +static inline int +__wt_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || + WT_STRING_MATCH(fh->name, + WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); + + WT_STAT_FAST_CONN_INCR(session, write_io); + + return (WT_JUMP(j_handle_write, session, fh, offset, len, buf)); +} + +/* + * __wt_vfprintf -- + * ANSI C vfprintf. + */ +static inline int +__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + return (WT_JUMP(j_handle_printf, session, fh, fmt, ap)); +} + +/* + * __wt_fprintf -- + * ANSI C fprintf. + */ +static inline int +__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) +{ + WT_DECL_RET; + va_list ap; + + va_start(ap, fmt); + ret = __wt_vfprintf(session, fh, fmt, ap); + va_end(ap); + + return (ret); +} diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index 4533c8cbca0..2975920eb16 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -339,7 +339,6 @@ union __wt_rand_state; #include "log.h" #include "lsm.h" #include "meta.h" -#include "os.h" #include "schema.h" #include "txn.h" @@ -349,15 +348,16 @@ union __wt_rand_state; #include "extern.h" #include "verify_build.h" -#include "buf.i" -#include "misc.i" #include "intpack.i" /* required by cell.i, packing.i */ -#include "packing.i" + +#include "buf.i" #include "cache.i" /* required by txn.i */ #include "cell.i" /* required by btree.i */ - #include "log.i" +#include "misc.i" #include "mutex.i" /* required by btree.i */ +#include "os.h" /* requires connection.h */ +#include "packing.i" #include "txn.i" /* required by btree.i */ #include "btree.i" /* required by cursor.i */ diff --git a/src/log/log.c b/src/log/log.c index 03145d8408c..4c32a547981 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -132,7 +132,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync %s to LSN %d/%lu", log->log_fh->name, min_lsn->l.file, min_lsn->l.offset)); - WT_ERR(__wt_fsync(session, log->log_fh)); + WT_ERR(__wt_fsync(session, log->log_fh, true)); log->sync_lsn = *min_lsn; WT_STAT_FAST_CONN_INCR(session, log_sync); WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); @@ -641,7 +641,7 @@ __log_file_header( /* * Make sure the header gets to disk. */ - WT_ERR(__wt_fsync(session, tmp.slot_fh)); + WT_ERR(__wt_fsync(session, tmp.slot_fh, true)); if (end_lsn != NULL) *end_lsn = tmp.slot_end_lsn; @@ -655,7 +655,7 @@ err: __wt_scr_free(session, &buf); */ static int __log_openfile(WT_SESSION_IMPL *session, - bool ok_create, WT_FH **fh, const char *file_prefix, uint32_t id) + bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id) { WT_DECL_ITEM(buf); WT_DECL_RET; @@ -673,8 +673,8 @@ __log_openfile(WT_SESSION_IMPL *session, WT_ERR(__log_filename(session, id, file_prefix, buf)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "opening log %s", (const char *)buf->data)); - WT_ERR(__wt_open( - session, buf->data, ok_create, false, WT_FILE_TYPE_LOG, fh)); + WT_ERR(__wt_open(session, buf->data, + WT_FILE_TYPE_LOG, ok_create ? WT_OPEN_CREATE : 0, fhp)); /* * If we are not creating the log file but opening it for reading, * check that the magic number and versions are correct. @@ -682,7 +682,7 @@ __log_openfile(WT_SESSION_IMPL *session, if (!ok_create) { WT_ERR(__wt_buf_grow(session, buf, allocsize)); memset(buf->mem, 0, allocsize); - WT_ERR(__wt_read(session, *fh, 0, allocsize, buf->mem)); + WT_ERR(__wt_read(session, *fhp, 0, allocsize, buf->mem)); logrec = (WT_LOG_RECORD *)buf->mem; __wt_log_record_byteswap(logrec); desc = (WT_LOG_DESC *)logrec->record; @@ -690,7 +690,7 @@ __log_openfile(WT_SESSION_IMPL *session, if (desc->log_magic != WT_LOG_MAGIC) WT_PANIC_RET(session, WT_ERROR, "log file %s corrupted: Bad magic number %" PRIu32, - (*fh)->name, desc->log_magic); + (*fhp)->name, desc->log_magic); if (desc->majorv > WT_LOG_MAJOR_VERSION || (desc->majorv == WT_LOG_MAJOR_VERSION && desc->minorv > WT_LOG_MINOR_VERSION)) @@ -850,7 +850,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) * the LSNs since we're the only write in progress. */ if (conn_open) { - WT_RET(__wt_fsync(session, log->log_fh)); + WT_RET(__wt_fsync(session, log->log_fh, true)); log->sync_lsn = end_lsn; log->write_lsn = end_lsn; log->write_start_lsn = end_lsn; @@ -946,7 +946,7 @@ __log_truncate(WT_SESSION_IMPL *session, WT_ERR(__log_openfile(session, false, &log_fh, file_prefix, lsn->l.file)); WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset)); - WT_ERR(__wt_fsync(session, log_fh)); + WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); /* @@ -969,7 +969,7 @@ __log_truncate(WT_SESSION_IMPL *session, */ WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD)); - WT_ERR(__wt_fsync(session, log_fh)); + WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); } } @@ -1019,7 +1019,7 @@ __wt_log_allocfile( WT_ERR(__log_file_header(session, log_fh, NULL, true)); WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD)); WT_ERR(__log_prealloc(session, log_fh)); - WT_ERR(__wt_fsync(session, log_fh)); + WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_prealloc: rename %s to %s", @@ -1086,7 +1086,7 @@ __wt_log_open(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_LOG, "log_open: open fh to directory %s", conn->log_path)); WT_RET(__wt_open(session, conn->log_path, - false, false, WT_FILE_TYPE_DIRECTORY, &log->log_dir_fh)); + WT_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); } if (!F_ISSET(conn, WT_CONN_READONLY)) { @@ -1174,14 +1174,14 @@ __wt_log_close(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_LOG, "closing old log %s", log->log_close_fh->name)); if (!F_ISSET(conn, WT_CONN_READONLY)) - WT_RET(__wt_fsync(session, log->log_close_fh)); + WT_RET(__wt_fsync(session, log->log_close_fh, true)); WT_RET(__wt_close(session, &log->log_close_fh)); } if (log->log_fh != NULL) { WT_RET(__wt_verbose(session, WT_VERB_LOG, "closing log %s", log->log_fh->name)); if (!F_ISSET(conn, WT_CONN_READONLY)) - WT_RET(__wt_fsync(session, log->log_fh)); + WT_RET(__wt_fsync(session, log->log_fh, true)); WT_RET(__wt_close(session, &log->log_fh)); log->log_fh = NULL; } @@ -1414,7 +1414,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) log->log_fh->name, sync_lsn.l.file, sync_lsn.l.offset)); WT_STAT_FAST_CONN_INCR(session, log_sync); - WT_ERR(__wt_fsync(session, log->log_fh)); + WT_ERR(__wt_fsync(session, log->log_fh, true)); log->sync_lsn = sync_lsn; WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); } diff --git a/src/log/log_auto.c b/src/log/log_auto.c index 54df01d01ab..1feace20e44 100644 --- a/src/log/log_auto.c +++ b/src/log/log_auto.c @@ -131,9 +131,8 @@ __wt_logop_col_put_unpack( } int -__wt_logop_col_put_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_col_put_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -145,17 +144,18 @@ __wt_logop_col_put_print( WT_RET(__wt_logop_col_put_unpack( session, pp, end, &fileid, &recno, &value)); - WT_RET(__wt_fprintf(out, " \"optype\": \"col_put\",\n")); - WT_ERR(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"optype\": \"col_put\",\n")); + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"recno\": \"%" PRIu64 "\",\n", recno)); WT_ERR(__logrec_make_json_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"value\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, ",\n \"value-hex\": \"%s\"", escaped)); } @@ -204,9 +204,8 @@ __wt_logop_col_remove_unpack( } int -__wt_logop_col_remove_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_col_remove_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { uint32_t fileid; uint64_t recno; @@ -215,10 +214,11 @@ __wt_logop_col_remove_print( WT_RET(__wt_logop_col_remove_unpack( session, pp, end, &fileid, &recno)); - WT_RET(__wt_fprintf(out, " \"optype\": \"col_remove\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"optype\": \"col_remove\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"recno\": \"%" PRIu64 "\"", recno)); return (0); } @@ -264,9 +264,8 @@ __wt_logop_col_truncate_unpack( } int -__wt_logop_col_truncate_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_col_truncate_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { uint32_t fileid; uint64_t start; @@ -276,12 +275,13 @@ __wt_logop_col_truncate_print( WT_RET(__wt_logop_col_truncate_unpack( session, pp, end, &fileid, &start, &stop)); - WT_RET(__wt_fprintf(out, " \"optype\": \"col_truncate\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"optype\": \"col_truncate\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"start\": \"%" PRIu64 "\",\n", start)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"stop\": \"%" PRIu64 "\"", stop)); return (0); } @@ -327,9 +327,8 @@ __wt_logop_row_put_unpack( } int -__wt_logop_row_put_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_row_put_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -341,23 +340,24 @@ __wt_logop_row_put_print( WT_RET(__wt_logop_row_put_unpack( session, pp, end, &fileid, &key, &value)); - WT_RET(__wt_fprintf(out, " \"optype\": \"row_put\",\n")); - WT_ERR(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"optype\": \"row_put\",\n")); + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"key\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"key-hex\": \"%s\",\n", escaped)); } WT_ERR(__logrec_make_json_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"value\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, ",\n \"value-hex\": \"%s\"", escaped)); } @@ -406,9 +406,8 @@ __wt_logop_row_remove_unpack( } int -__wt_logop_row_remove_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_row_remove_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -419,15 +418,16 @@ __wt_logop_row_remove_print( WT_RET(__wt_logop_row_remove_unpack( session, pp, end, &fileid, &key)); - WT_RET(__wt_fprintf(out, " \"optype\": \"row_remove\",\n")); - WT_ERR(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"optype\": \"row_remove\",\n")); + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"key\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, ",\n \"key-hex\": \"%s\"", escaped)); } @@ -476,9 +476,8 @@ __wt_logop_row_truncate_unpack( } int -__wt_logop_row_truncate_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_row_truncate_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -491,26 +490,27 @@ __wt_logop_row_truncate_print( WT_RET(__wt_logop_row_truncate_unpack( session, pp, end, &fileid, &start, &stop, &mode)); - WT_RET(__wt_fprintf(out, " \"optype\": \"row_truncate\",\n")); - WT_ERR(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"optype\": \"row_truncate\",\n")); + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &start)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"start\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &start)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"start-hex\": \"%s\",\n", escaped)); } WT_ERR(__logrec_make_json_str(session, &escaped, &stop)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"stop\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &stop)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"stop-hex\": \"%s\",\n", escaped)); } - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT, " \"mode\": \"%" PRIu32 "\"", mode)); err: __wt_free(session, escaped); @@ -518,9 +518,8 @@ err: __wt_free(session, escaped); } int -__wt_txn_op_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_txn_op_printlog(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { uint32_t optype, opsize; @@ -530,33 +529,27 @@ __wt_txn_op_printlog( switch (optype) { case WT_LOGOP_COL_PUT: - WT_RET(__wt_logop_col_put_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_col_put_print(session, pp, end, flags)); break; case WT_LOGOP_COL_REMOVE: - WT_RET(__wt_logop_col_remove_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_col_remove_print(session, pp, end, flags)); break; case WT_LOGOP_COL_TRUNCATE: - WT_RET(__wt_logop_col_truncate_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_col_truncate_print(session, pp, end, flags)); break; case WT_LOGOP_ROW_PUT: - WT_RET(__wt_logop_row_put_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_row_put_print(session, pp, end, flags)); break; case WT_LOGOP_ROW_REMOVE: - WT_RET(__wt_logop_row_remove_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_row_remove_print(session, pp, end, flags)); break; case WT_LOGOP_ROW_TRUNCATE: - WT_RET(__wt_logop_row_truncate_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_row_truncate_print(session, pp, end, flags)); break; WT_ILLEGAL_VALUE(session); diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index 471bb65cac0..35b6b68ec14 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -68,27 +68,27 @@ __metadata_init(WT_SESSION_IMPL *session) static int __metadata_load_hot_backup(WT_SESSION_IMPL *session) { - FILE *fp; WT_DECL_ITEM(key); WT_DECL_ITEM(value); WT_DECL_RET; + WT_FH *fh; bool exist; /* Look for a hot backup file: if we find it, load it. */ WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist)); if (!exist) return (0); - WT_RET(__wt_fopen(session, - WT_METADATA_BACKUP, WT_FHANDLE_READ, 0, &fp)); + WT_RET(__wt_open(session, + WT_METADATA_BACKUP, WT_FILE_TYPE_REGULAR, WT_STREAM_READ, &fh)); /* Read line pairs and load them into the metadata file. */ WT_ERR(__wt_scr_alloc(session, 512, &key)); WT_ERR(__wt_scr_alloc(session, 512, &value)); for (;;) { - WT_ERR(__wt_getline(session, key, fp)); + WT_ERR(__wt_getline(session, key, fh)); if (key->size == 0) break; - WT_ERR(__wt_getline(session, value, fp)); + WT_ERR(__wt_getline(session, value, fh)); if (value->size == 0) WT_ERR(__wt_illegal_value(session, WT_METADATA_BACKUP)); WT_ERR(__wt_metadata_update(session, key->data, value->data)); @@ -96,7 +96,7 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session) F_SET(S2C(session), WT_CONN_WAS_BACKUP); -err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_READ)); +err: WT_TRET(__wt_close(session, &fh)); __wt_scr_free(session, &key); __wt_scr_free(session, &value); return (ret); @@ -240,9 +240,9 @@ err: __wt_free(session, metaconf); int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) { - FILE *fp; WT_DECL_ITEM(buf); WT_DECL_RET; + WT_FH *fh; bool exist, match; *valuep = NULL; @@ -257,20 +257,20 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) if (!exist) return (strcmp(key, WT_METAFILE_URI) == 0 ? __metadata_config(session, valuep) : WT_NOTFOUND); - WT_RET(__wt_fopen(session, - WT_METADATA_TURTLE, WT_FHANDLE_READ, 0, &fp)); + WT_RET(__wt_open(session, + WT_METADATA_TURTLE, WT_FILE_TYPE_REGULAR, WT_STREAM_READ, &fh)); /* Search for the key. */ WT_ERR(__wt_scr_alloc(session, 512, &buf)); for (match = false;;) { - WT_ERR(__wt_getline(session, buf, fp)); + WT_ERR(__wt_getline(session, buf, fh)); if (buf->size == 0) WT_ERR(WT_NOTFOUND); if (strcmp(key, buf->data) == 0) match = true; /* Key matched: read the subsequent line for the value. */ - WT_ERR(__wt_getline(session, buf, fp)); + WT_ERR(__wt_getline(session, buf, fh)); if (buf->size == 0) WT_ERR(__wt_illegal_value(session, WT_METADATA_TURTLE)); if (match) @@ -280,7 +280,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) /* Copy the value for the caller. */ WT_ERR(__wt_strdup(session, buf->data, valuep)); -err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_READ)); +err: WT_TRET(__wt_close(session, &fh)); __wt_scr_free(session, &buf); return (ret); } @@ -304,8 +304,8 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) * Create the turtle setup file: we currently re-write it from scratch * every time. */ - WT_RET(__wt_open(session, - WT_METADATA_TURTLE_SET, true, true, WT_FILE_TYPE_TURTLE, &fh)); + WT_RET(__wt_open(session, WT_METADATA_TURTLE_SET, + WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)); version = wiredtiger_version(&vmajor, &vminor, &vpatch); WT_ERR(__wt_scr_alloc(session, 2 * 1024, &buf)); @@ -317,7 +317,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) WT_ERR(__wt_write(session, fh, 0, buf->size, buf->data)); /* Flush the handle and rename the file into place. */ - ret = __wt_fh_sync_and_rename( + ret = __wt_sync_handle_and_rename( session, &fh, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE); /* Close any file handle left open, remove any temporary file. */ diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c index 83e77aa5312..d806c270860 100644 --- a/src/os_posix/os_dir.c +++ b/src/os_posix/os_dir.c @@ -27,6 +27,8 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, bool match; char **entries, *path; + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + *dirlist = NULL; *countp = 0; diff --git a/src/os_posix/os_exist.c b/src/os_posix/os_exist.c deleted file mode 100644 index 87f0e219d2e..00000000000 --- a/src/os_posix/os_exist.c +++ /dev/null @@ -1,38 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_exist -- - * Return if the file exists. - */ -int -__wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp) -{ - struct stat sb; - WT_DECL_RET; - char *path; - - *existp = false; - - WT_RET(__wt_filename(session, filename, &path)); - - WT_SYSCALL_RETRY(stat(path, &sb), ret); - - __wt_free(session, path); - - if (ret == 0) { - *existp = true; - return (0); - } - if (ret == ENOENT) - return (0); - - WT_RET_MSG(session, ret, "%s: fstat", filename); -} diff --git a/src/os_posix/os_fallocate.c b/src/os_posix/os_fallocate.c index bf20a99bdef..dbe5857e195 100644 --- a/src/os_posix/os_fallocate.c +++ b/src/os_posix/os_fallocate.c @@ -116,6 +116,8 @@ __wt_fallocate( WT_DECL_RET; WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + switch (fh->fallocate_available) { /* * Check for already configured handles and make the configured call. diff --git a/src/os_posix/os_filesize.c b/src/os_posix/os_filesize.c deleted file mode 100644 index 72242e351bf..00000000000 --- a/src/os_posix/os_filesize.c +++ /dev/null @@ -1,62 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_filesize -- - * Get the size of a file in bytes. - */ -int -__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fstat", fh->name)); - - WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); - if (ret == 0) { - *sizep = sb.st_size; - return (0); - } - - WT_RET_MSG(session, ret, "%s: fstat", fh->name); -} - -/* - * __wt_filesize_name -- - * Return the size of a file in bytes, given a file name. - */ -int -__wt_filesize_name(WT_SESSION_IMPL *session, - const char *filename, bool silent, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, filename, &path)); - - WT_SYSCALL_RETRY(stat(path, &sb), ret); - - __wt_free(session, path); - - if (ret == 0) { - *sizep = sb.st_size; - return (0); - } - - /* - * Some callers of this function expect failure if the file doesn't - * exist, and don't want an error message logged. - */ - if (!silent) - WT_RET_MSG(session, ret, "%s: fstat", filename); - return (ret); -} diff --git a/src/os_posix/os_flock.c b/src/os_posix/os_flock.c deleted file mode 100644 index e2056f7636c..00000000000 --- a/src/os_posix/os_flock.c +++ /dev/null @@ -1,38 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_bytelock -- - * Lock/unlock a byte in a file. - */ -int -__wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock) -{ - struct flock fl; - WT_DECL_RET; - - /* - * WiredTiger requires this function be able to acquire locks past - * the end of file. - * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - */ - fl.l_start = byte; - fl.l_len = 1; - fl.l_type = lock ? F_WRLCK : F_UNLCK; - fl.l_whence = SEEK_SET; - - WT_SYSCALL_RETRY(fcntl(fhp->fd, F_SETLK, &fl), ret); - - return (ret); -} diff --git a/src/os_posix/os_fsync.c b/src/os_posix/os_fsync.c index 0bd0359338b..9a08a337215 100644 --- a/src/os_posix/os_fsync.c +++ b/src/os_posix/os_fsync.c @@ -8,64 +8,21 @@ #include "wt_internal.h" -/* - * __wt_handle_sync -- - * Flush a file handle. - */ -static int -__wt_handle_sync(int fd) -{ - WT_DECL_RET; - -#if defined(F_FULLFSYNC) - /* - * OS X fsync documentation: - * "Note that while fsync() will flush all data from the host to the - * drive (i.e. the "permanent storage device"), the drive itself may - * not physically write the data to the platters for quite some time - * and it may be written in an out-of-order sequence. For applications - * that require tighter guarantees about the integrity of their data, - * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks - * the drive to flush all buffered data to permanent storage." - * - * OS X F_FULLFSYNC fcntl documentation: - * "This is currently implemented on HFS, MS-DOS (FAT), and Universal - * Disk Format (UDF) file systems." - */ - WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); - if (ret == 0) - return (0); - /* - * Assume F_FULLFSYNC failed because the file system doesn't support it - * and fallback to fsync. - */ -#endif -#if defined(HAVE_FDATASYNC) - WT_SYSCALL_RETRY(fdatasync(fd), ret); -#else - WT_SYSCALL_RETRY(fsync(fd), ret); -#endif - return (ret); -} - /* * __wt_directory_sync_fh -- - * Flush a directory file handle. We don't use __wt_fsync because - * most file systems don't require this step and we don't want to - * penalize them by calling fsync. + * Flush a directory file handle to ensure file creation is durable. + * + * We don't use fsync because most file systems don't require this step and + * we don't want to penalize them by calling fsync. */ int __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) { -#ifdef __linux__ - WT_DECL_RET; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - if ((ret = __wt_handle_sync(fh->fd)) == 0) - return (0); - WT_RET_MSG(session, ret, "%s: fsync", fh->name); + +#ifdef __linux__ + return (WT_JUMP(j_handle_sync, session, fh, true))); #else - WT_UNUSED(session); WT_UNUSED(fh); return (0); #endif @@ -73,11 +30,13 @@ __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) /* * __wt_directory_sync -- - * Flush a directory to ensure a file creation is durable. + * Flush a directory to ensure file creation is durable. */ int __wt_directory_sync(WT_SESSION_IMPL *session, const char *path) { + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + #ifdef __linux__ WT_DECL_RET; int fd, tret; @@ -103,69 +62,13 @@ __wt_directory_sync(WT_SESSION_IMPL *session, const char *path) path = copy; } - WT_SYSCALL_RETRY(((fd = - open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); - __wt_free(session, copy); - if (ret != 0) - WT_RET_MSG(session, ret, "%s: open", path); + ret = WT_JUMP(j_directory_sync, session, path); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - if ((ret = __wt_handle_sync(fd)) != 0) - WT_ERR_MSG(session, ret, "%s: fsync", path); + __wt_free(session, copy); -err: WT_SYSCALL_RETRY(close(fd), tret); - if (tret != 0) - __wt_err(session, tret, "%s: close", path); - WT_TRET(tret); return (ret); #else - WT_UNUSED(session); WT_UNUSED(path); return (0); #endif } - -/* - * __wt_fsync -- - * Flush a file handle. - */ -int -__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fsync", fh->name)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - if ((ret = __wt_handle_sync(fh->fd)) == 0) - return (0); - WT_RET_MSG(session, ret, "%s fsync error", fh->name); -} - -/* - * __wt_fsync_async -- - * Flush a file handle and don't wait for the result. - */ -int -__wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh) -{ -#ifdef HAVE_SYNC_FILE_RANGE - WT_DECL_RET; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: sync_file_range", fh->name)); - - WT_SYSCALL_RETRY(sync_file_range(fh->fd, - (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: sync_file_range", fh->name); -#else - WT_UNUSED(session); - WT_UNUSED(fh); - return (0); -#endif -} diff --git a/src/os_posix/os_ftruncate.c b/src/os_posix/os_ftruncate.c deleted file mode 100644 index 94d6cba3bf5..00000000000 --- a/src/os_posix/os_ftruncate.c +++ /dev/null @@ -1,26 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_ftruncate -- - * Truncate a file. - */ -int -__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); - if (ret == 0) - return (0); - - WT_RET_MSG(session, ret, "%s ftruncate error", fh->name); -} diff --git a/src/os_posix/os_getline.c b/src/os_posix/os_getline.c index c0ca96852de..3542ef4e76e 100644 --- a/src/os_posix/os_getline.c +++ b/src/os_posix/os_getline.c @@ -20,7 +20,7 @@ * (so the caller's EOF marker is a returned line length of 0). */ int -__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp) +__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh) { int c; @@ -30,7 +30,11 @@ __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp) */ WT_RET(__wt_buf_init(session, buf, 100)); - while ((c = fgetc(fp)) != EOF) { + for (;;) { + WT_RET(WT_JUMP(j_handle_getc, session, fh, &c)); + if (c == EOF) + break; + /* Leave space for a trailing NUL. */ WT_RET(__wt_buf_extend(session, buf, buf->size + 2)); if (c == '\n') { @@ -40,8 +44,6 @@ __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp) } ((char *)buf->mem)[buf->size++] = (char)c; } - if (c == EOF && ferror(fp)) - WT_RET_MSG(session, __wt_errno(), "file read"); ((char *)buf->mem)[buf->size] = '\0'; diff --git a/src/os_posix/os_init.c b/src/os_posix/os_init.c new file mode 100644 index 00000000000..acba622b366 --- /dev/null +++ b/src/os_posix/os_init.c @@ -0,0 +1,31 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_os_init -- + * Initialize the OS layer. + */ +int +__wt_os_init(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? + __wt_os_inmemory(session) : __wt_os_posix(session)); +} + +/* + * __wt_os_cleanup -- + * Clean up the OS layer. + */ +int +__wt_os_cleanup(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? + __wt_os_inmemory_cleanup(session) : __wt_os_posix_cleanup(session)); +} diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c new file mode 100644 index 00000000000..4b65daa23f3 --- /dev/null +++ b/src/os_posix/os_inmemory.c @@ -0,0 +1,451 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * In-memory information. + */ +typedef struct { + WT_SPINLOCK lock; +} WT_IM; + +/* + * __im_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static int +__im_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +} + +/* + * __im_file_exist -- + * Return if the file exists. + */ +static int +__im_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + *existp = __wt_handle_search(session, name, false, true, NULL, NULL); + return (0); +} + +/* + * __im_file_remove -- + * POSIX remove. + */ +static int +__im_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + WT_FH *fh; + + if (__wt_handle_search(session, name, true, true, NULL, &fh)) { + WT_ASSERT(session, fh->ref == 1); + + /* Force a discard of the handle. */ + F_CLR(fh, WT_FH_IN_MEMORY); + ret = __wt_close(session, &fh); + } + return (ret); +} + +/* + * __im_file_rename -- + * POSIX rename. + */ +static int +__im_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *fh; + uint64_t bucket, hash; + char *to_name; + + conn = S2C(session); + + /* We'll need a copy of the target name. */ + WT_RET(__wt_strdup(session, to, &to_name)); + + __wt_spin_lock(session, &conn->fh_lock); + + /* Make sure the target name isn't active. */ + hash = __wt_hash_city64(to, strlen(to)); + bucket = hash % WT_HASH_ARRAY_SIZE; + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(to, fh->name) == 0) + WT_ERR(EPERM); + + /* Find the source name. */ + hash = __wt_hash_city64(from, strlen(from)); + bucket = hash % WT_HASH_ARRAY_SIZE; + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(from, fh->name) == 0) + break; + if (fh == NULL) + WT_ERR(ENOENT); + + /* Remove source from the list. */ + WT_CONN_FILE_REMOVE(conn, fh, bucket); + + /* Swap the names. */ + __wt_free(session, fh->name); + fh->name = to_name; + to_name = NULL; + + /* Put source back on the list. */ + hash = __wt_hash_city64(to, strlen(to)); + bucket = hash % WT_HASH_ARRAY_SIZE; + WT_CONN_FILE_INSERT(conn, fh, bucket); + + if (0) { +err: __wt_free(session, to_name); + } + __wt_spin_unlock(session, &conn->fh_lock); + + return (ret); +} + +/* + * __im_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__im_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WT_DECL_RET; + WT_FH *fh; + WT_IM *im; + + WT_UNUSED(silent); + + im = __wt_process.inmemory; + __wt_spin_lock(session, &im->lock); + + if (__wt_handle_search(session, name, false, false, NULL, &fh)) { + *sizep = fh->buf.size; + __wt_handle_search_unlock(session); + } else + ret = ENOENT; + + __wt_spin_unlock(session, &im->lock); + return (ret); +} + +/* + * __im_handle_advise -- + * POSIX fadvise. + */ +static int +__im_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + return (0); +} + +/* + * __im_handle_close -- + * ANSI C close/fclose. + */ +static int +__im_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + __wt_buf_free(session, &fh->buf); + + return (0); +} + +/* + * __im_handle_getc -- + * ANSI C fgetc. + */ +static int +__im_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + WT_IM *im; + + im = __wt_process.inmemory; + __wt_spin_lock(session, &im->lock); + + if (fh->off >= fh->buf.size) + *chp = EOF; + else + *chp = ((char *)fh->buf.data)[fh->off++]; + + __wt_spin_unlock(session, &im->lock); + return (0); +} + +/* + * __im_handle_lock -- + * Lock/unlock a file. + */ +static int +__im_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(lock); + return (0); +} + +/* + * __im_handle_open -- + * POSIX fopen/open. + */ +static int +__im_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *path, int dio_type, u_int flags) +{ + WT_IM *im; + + WT_UNUSED(path); + WT_UNUSED(dio_type); + WT_UNUSED(flags); + + fh->off = 0; + F_SET(fh, WT_FH_IN_MEMORY); + + return (0); +} + +/* + * __im_handle_printf -- + * ANSI C vfprintf. + */ +static int +__im_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + va_list ap_copy; + WT_DECL_ITEM(tmp); + WT_DECL_RET; + WT_IM *im; + size_t len; + + im = __wt_process.inmemory; + + if (fh == WT_STDERR || fh == WT_STDOUT) { + if (vfprintf(fh == WT_STDERR ? stderr : stdout, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, + "%s: vfprintf", fh == WT_STDERR ? "stderr" : "stdout"); + } + + /* Build the string we're writing. */ + WT_RET(__wt_scr_alloc(session, strlen(fmt) * 2 + 128, &tmp)); + for (;;) { + va_copy(ap_copy, ap); + len = (size_t)vsnprintf(tmp->mem, tmp->memsize, fmt, ap_copy); + if (len < tmp->memsize) { + tmp->data = tmp->mem; + tmp->size = len; + break; + } + WT_ERR(__wt_buf_extend(session, tmp, len + 1)); + } + + __wt_spin_lock(session, &im->lock); + + /* Grow the handle's buffer as necessary. */ + WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)(fh->off + len))); + + /* Copy the data into place and update the offset. */ + memcpy((uint8_t *)fh->buf.mem + fh->off, tmp->data, len); + fh->off += len; + +err: __wt_spin_unlock(session, &im->lock); + + __wt_scr_free(session, &tmp); + return (ret); +} + +/* + * __im_handle_read -- + * POSIX pread. + */ +static int +__im_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_DECL_RET; + WT_IM *im; + + im = __wt_process.inmemory; + __wt_spin_lock(session, &im->lock); + + if (offset < fh->buf.size) { + len = WT_MIN(len, (size_t)(fh->buf.size - offset)); + memcpy(buf, (uint8_t *)fh->buf.mem + offset, len); + fh->off = offset + len; + } else + ret = WT_ERROR; + + __wt_spin_unlock(session, &im->lock); + if (ret == 0) + return (0); + WT_RET_MSG(session, WT_ERROR, + "%s read error: failed to read %" WT_SIZET_FMT + " bytes at offset %" PRIuMAX, + fh->name, len, (uintmax_t)offset); +} + +/* + * __im_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_UNUSED(session); + + *sizep = fh->buf.size; + return (0); +} + +/* + * __im_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool wait) +{ + WT_UNUSED(wait); + + /* Flush any stream's stdio buffers. */ + if (fh == WT_STDERR || fh == WT_STDOUT) { + if (fflush(fh == WT_STDERR ? stderr : stdout) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), + "%s: fflush", fh == WT_STDERR ? "stderr" : "stdout"); + } + return (0); +} + +/* + * __im_handle_truncate -- + * POSIX ftruncate. + */ +static int +__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + WT_IM *im; + + im = __wt_process.inmemory; + __wt_spin_lock(session, &im->lock); + + WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)len)); + memset((uint8_t *) + fh->buf.mem + fh->buf.size, 0, fh->buf.memsize - fh->buf.size); + +err: __wt_spin_unlock(session, &im->lock); + return (ret); +} + +/* + * __im_handle_write -- + * POSIX pwrite. + */ +static int +__im_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_DECL_RET; + WT_IM *im; + + im = __wt_process.inmemory; + __wt_spin_lock(session, &im->lock); + + WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)(offset + len + 1024))); + + memcpy((uint8_t *)fh->buf.data + offset, buf, len); + if (offset + len > fh->buf.size) + fh->buf.size = (size_t)(offset + len); + fh->off = offset + len; + +err: __wt_spin_unlock(session, &im->lock); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, + "%s write error: failed to write %" WT_SIZET_FMT + " bytes at offset %" PRIuMAX, + fh->name, len, (uintmax_t)offset); +} + +/* + * __wt_os_inmemory -- + * Initialize an in-memory configuration. + */ +int +__wt_os_inmemory(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + WT_IM *im; + + im = NULL; + + /* Allocate an in-memory structure. */ + WT_RET(__wt_calloc_one(session, &im)); + WT_ERR(__wt_spin_init(session, &im->lock, "in-memory I/O")); + + /* Initialize the in-memory jump table. */ + __wt_process.j_directory_sync = __im_directory_sync; + __wt_process.j_file_exist = __im_file_exist; + __wt_process.j_file_remove = __im_file_remove; + __wt_process.j_file_rename = __im_file_rename; + __wt_process.j_file_size = __im_file_size; + __wt_process.j_handle_advise = __im_handle_advise; + __wt_process.j_handle_close = __im_handle_close; + __wt_process.j_handle_getc = __im_handle_getc; + __wt_process.j_handle_lock = __im_handle_lock; + __wt_process.j_handle_open = __im_handle_open; + __wt_process.j_handle_printf = __im_handle_printf; + __wt_process.j_handle_read = __im_handle_read; + __wt_process.j_handle_size = __im_handle_size; + __wt_process.j_handle_sync = __im_handle_sync; + __wt_process.j_handle_truncate = __im_handle_truncate; + __wt_process.j_handle_write = __im_handle_write; + + __wt_process.inmemory = im; + return (0); + +err: __wt_free(session, im); + return (ret); +} + +/* + * __wt_os_inmemory_cleanup -- + * Discard an in-memory configuration. + */ +int +__wt_os_inmemory_cleanup(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + WT_IM *im; + + if ((im = __wt_process.inmemory) == NULL) + return (0); + __wt_process.inmemory = NULL; + + __wt_spin_destroy(session, &im->lock); + + __wt_free(session, im); + + return (ret); +} diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 42aeeac4a5e..74fbce2e961 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -21,6 +21,8 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_UNUSED(mappingcookie); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + /* * Record the current size and only map and set that as the length, it * could change between the map call and when we set the return length. @@ -57,10 +59,16 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) { #ifdef HAVE_POSIX_MADVISE /* Linux requires the address be aligned to a 4KB boundary. */ - WT_CONNECTION_IMPL *conn = S2C(session); - WT_BM *bm = S2BT(session)->bm; + WT_CONNECTION_IMPL *conn; + WT_BM *bm; WT_DECL_RET; - void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); + void *blk; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + conn = S2C(session); + bm = S2BT(session)->bm; + blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); /* XXX proxy for "am I doing a scan?" -- manual read-ahead */ @@ -100,9 +108,14 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size) { #ifdef HAVE_POSIX_MADVISE /* Linux requires the address be aligned to a 4KB boundary. */ - WT_CONNECTION_IMPL *conn = S2C(session); + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); + void *blk; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + conn = S2C(session); + blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) != 0) @@ -125,6 +138,8 @@ __wt_munmap(WT_SESSION_IMPL *session, { WT_UNUSED(mappingcookie); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: unmap %p: %" WT_SIZET_FMT " bytes", fh->name, map, len)); diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 219b26c2fa1..5cf0ddf0067 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -9,19 +9,73 @@ #include "wt_internal.h" /* - * __open_directory -- - * Open up a file handle to a directory. + * __wt_handle_search -- + * Search for a matching handle. */ -static int -__open_directory(WT_SESSION_IMPL *session, char *path, int *fd) +bool +__wt_handle_search(WT_SESSION_IMPL *session, const char *name, + bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp) { - WT_DECL_RET; + WT_CONNECTION_IMPL *conn; + WT_FH *fh; + uint64_t bucket, hash; + bool found; - WT_SYSCALL_RETRY(((*fd = - open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_RET_MSG(session, ret, "%s: open_directory", path); - return (ret); + if (fhp != NULL) + *fhp = NULL; + + conn = S2C(session); + found = false; + + hash = __wt_hash_city64(name, strlen(name)); + bucket = hash % WT_HASH_ARRAY_SIZE; + + __wt_spin_lock(session, &conn->fh_lock); + + /* + * If we already have the file open, optionally increment the reference + * count and return a pointer. + */ + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(name, fh->name) == 0) { + if (increment_ref) + ++fh->ref; + if (fhp != NULL) + *fhp = fh; + found = true; + break; + } + + /* If we don't find a match, optionally add a new entry. */ + if (!found && newfh != NULL) { + newfh->name_hash = hash; + WT_CONN_FILE_INSERT(conn, newfh, bucket); + (void)__wt_atomic_add32(&conn->open_file_count, 1); + + if (increment_ref) + ++newfh->ref; + if (fhp != NULL) + *fhp = newfh; + } + + /* + * Our caller may be operating on the handle itself, optionally leave + * the list locked. + */ + if (unlock) + __wt_spin_unlock(session, &conn->fh_lock); + + return (found); +} + +/* + * __wt_handle_search_unlock -- + * Release handle lock. + */ +void +__wt_handle_search_unlock(WT_SESSION_IMPL *session) +{ + __wt_spin_unlock(session, &S2C(session)->fh_lock); } /* @@ -30,180 +84,77 @@ __open_directory(WT_SESSION_IMPL *session, char *path, int *fd) */ int __wt_open(WT_SESSION_IMPL *session, - const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp) + const char *name, int dio_type, u_int flags, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_FH *fh, *tfh; - mode_t mode; - uint64_t bucket, hash; - int f, fd; - bool direct_io, matched; - char *path; + WT_FH *fh; + bool open_called; conn = S2C(session); - direct_io = false; + fh = NULL; - fd = -1; - path = NULL; + open_called = false; WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name)); - /* Increment the reference count if we already have the file open. */ - matched = false; - hash = __wt_hash_city64(name, strlen(name)); - bucket = hash % WT_HASH_ARRAY_SIZE; - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) { - if (strcmp(name, tfh->name) == 0) { - ++tfh->ref; - *fhp = tfh; - matched = true; - break; - } - } - __wt_spin_unlock(session, &conn->fh_lock); - if (matched) + /* Check if the handle is already open. */ + if (__wt_handle_search(session, name, true, true, NULL, &fh)) { + /* + * XXX + * The in-memory implementation has to reset the file offset + * when a file is re-opened (which obviously also depends on + * in-memory configurations never opening a file in more than + * one thread at a time). This needs to be fixed. + */ + if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1) + fh->off = 0; + *fhp = fh; return (0); + } - WT_RET(__wt_filename(session, name, &path)); + /* Allocate a structure and set the name. */ + WT_ERR(__wt_calloc_one(session, &fh)); + WT_ERR(__wt_strdup(session, name, &fh->name)); - if (dio_type == WT_FILE_TYPE_DIRECTORY) { - WT_ERR(__open_directory(session, path, &fd)); - goto setupfh; - } + /* Configure fallocate/posix_fallocate calls. */ + __wt_fallocate_config(session, fh); /* - * If this is a read-only connection, open all files read-only - * except the lock file. + * If this is a read-only connection, open all files read-only except + * the lock file. */ if (F_ISSET(conn, WT_CONN_READONLY) && - !WT_STRING_MATCH(name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))) - f = O_RDONLY; - else - f = O_RDWR; -#ifdef O_BINARY - /* Windows clones: we always want to treat the file as a binary. */ - f |= O_BINARY; -#endif -#ifdef O_CLOEXEC - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. - */ - f |= O_CLOEXEC; -#endif -#ifdef O_NOATIME - /* Avoid updating metadata for read-only workloads. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - f |= O_NOATIME; -#endif - - if (ok_create) { - WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || - WT_STRING_MATCH(name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - f |= O_CREAT; - if (exclusive) - f |= O_EXCL; - mode = 0666; - } else - mode = 0; - -#ifdef O_DIRECT - if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { - f |= O_DIRECT; - direct_io = true; - } -#endif - if (dio_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) -#ifdef O_DSYNC - f |= O_DSYNC; -#elif defined(O_SYNC) - f |= O_SYNC; -#else - WT_ERR_MSG(session, ENOTSUP, - "Unsupported log sync mode requested"); -#endif - WT_SYSCALL_RETRY(((fd = open(path, f, mode)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_ERR_MSG(session, ret, - direct_io ? - "%s: open failed with direct I/O configured, some " - "filesystem types do not support direct I/O" : "%s", path); - -setupfh: -#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) + !WT_STRING_MATCH(name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))) + LF_SET(WT_OPEN_READONLY); + /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. There's an obvious - * race here, so we prefer the flag to open if available. + * The only file created in read-only mode is the lock file. */ - if ((f = fcntl(fd, F_GETFD)) == -1 || - fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) - WT_ERR_MSG(session, __wt_errno(), "%s: fcntl", name); -#endif - -#if defined(HAVE_POSIX_FADVISE) - /* Disable read-ahead on trees: it slows down random read workloads. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM)); -#endif + WT_ASSERT(session, + !LF_ISSET(WT_OPEN_CREATE) || + !F_ISSET(conn, WT_CONN_READONLY) || + WT_STRING_MATCH(name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); - WT_ERR(__wt_calloc_one(session, &fh)); - WT_ERR(__wt_strdup(session, name, &fh->name)); - fh->name_hash = hash; - fh->fd = fd; - fh->ref = 1; - fh->direct_io = direct_io; + /* Call the underlying open function. */ + WT_ERR(WT_JUMP(j_handle_open, session, fh, name, dio_type, flags)); + open_called = true; /* Set the file's size. */ - WT_ERR(__wt_filesize(session, fh, &fh->size)); - - /* Configure file extension. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - fh->extend_len = conn->data_extend_len; - - /* Configure fallocate/posix_fallocate calls. */ - __wt_fallocate_config(session, fh); + WT_ERR(WT_JUMP(j_handle_size, session, fh, &fh->size)); /* - * Repeat the check for a match, but then link onto the database's list - * of files. + * Repeat the check for a match: if there's no match, link our newly + * created handle onto the database's list of files. */ - matched = false; - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) { - if (strcmp(name, tfh->name) == 0) { - ++tfh->ref; - *fhp = tfh; - matched = true; - break; - } - } - if (!matched) { - WT_CONN_FILE_INSERT(conn, fh, bucket); - (void)__wt_atomic_add32(&conn->open_file_count, 1); - *fhp = fh; - } - __wt_spin_unlock(session, &conn->fh_lock); - if (matched) { -err: if (fh != NULL) { + if (__wt_handle_search(session, name, true, true, fh, fhp)) { +err: if (open_called) + WT_TRET(WT_JUMP(j_handle_close, session, fh)); + if (fh != NULL) { __wt_free(session, fh->name); __wt_free(session, fh); } - if (fd != -1) - (void)close(fd); } - - __wt_free(session, path); return (ret); } @@ -226,10 +177,21 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) fh = *fhp; *fhp = NULL; + /* Catch attempts to close the standard streams. */ + if (fh == WT_STDERR || fh == WT_STDOUT) + return (EINVAL); + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: close", fh->name)); + /* + * If the reference count hasn't gone to 0, or if it's an in-memory + * object, we're done. + * + * Assert the reference count is correct, but don't let it wrap. + */ __wt_spin_lock(session, &conn->fh_lock); - if (fh == NULL || fh->ref == 0 || --fh->ref > 0) { + WT_ASSERT(session, fh->ref > 0); + if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) { __wt_spin_unlock(session, &conn->fh_lock); return (0); } @@ -241,13 +203,43 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) __wt_spin_unlock(session, &conn->fh_lock); - /* Discard the memory. */ - if (close(fh->fd) != 0) { - ret = __wt_errno(); - __wt_err(session, ret, "close: %s", fh->name); - } + /* Discard underlying resources. */ + ret = WT_JUMP(j_handle_close, session, fh); __wt_free(session, fh->name); __wt_free(session, fh); + + return (ret); +} + +/* + * __wt_close_connection_close -- + * Close any open file handles at connection close. + */ +int +__wt_close_connection_close(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + WT_FH *fh; + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) { + /* + * In-memory configurations will have open files, but the ref + * counts should be zero. + */ + if (!F_ISSET(conn, WT_CONN_IN_MEMORY) || fh->ref != 0) { + ret = EBUSY; + __wt_errx(session, + "Connection has open file handles: %s", fh->name); + } + + fh->ref = 1; + F_CLR(fh, WT_FH_IN_MEMORY); + + WT_TRET(__wt_close(session, &fh)); + } return (ret); } diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c new file mode 100644 index 00000000000..e3b39d69363 --- /dev/null +++ b/src/os_posix/os_posix.c @@ -0,0 +1,659 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __posix_sync -- + * Underlying support function to flush a file handle. + */ +static int +__posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, bool wait) +{ + WT_DECL_RET; + +#ifdef HAVE_SYNC_FILE_RANGE + if (!wait) { + WT_SYSCALL_RETRY(sync_file_range(fd, + (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: sync_file_range", name); + } +#else + if (!wait) + return (0); +#endif + +#if defined(F_FULLFSYNC) + /* + * OS X fsync documentation: + * "Note that while fsync() will flush all data from the host to the + * drive (i.e. the "permanent storage device"), the drive itself may + * not physically write the data to the platters for quite some time + * and it may be written in an out-of-order sequence. For applications + * that require tighter guarantees about the integrity of their data, + * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks + * the drive to flush all buffered data to permanent storage." + * + * OS X F_FULLFSYNC fcntl documentation: + * "This is currently implemented on HFS, MS-DOS (FAT), and Universal + * Disk Format (UDF) file systems." + */ + WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); + if (ret == 0) + return (0); + /* + * Assume F_FULLFSYNC failed because the file system doesn't support it + * and fallback to fsync. + */ +#endif +#if defined(HAVE_FDATASYNC) + WT_SYSCALL_RETRY(fdatasync(fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: fdatasync", name); +#else + WT_SYSCALL_RETRY(fsync(fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: fsync", name); +#endif +} + +/* + * __posix_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static int +__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ +#ifdef __linux__ + WT_DECL_RET; + int fd, tret; + + WT_SYSCALL_RETRY(( + (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_RET_MSG(session, ret, "%s: open", path); + + ret = __posix_sync(session, fd, path, true); + + WT_SYSCALL_RETRY(close(fd), tret); + if (tret != 0) + __wt_err(session, tret, "%s: fsync", name); + return (ret == 0 ? tret : ret); +#else + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +#endif +} + +/* + * __posix_file_exist -- + * Return if the file exists. + */ +static int +__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + struct stat sb; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + WT_SYSCALL_RETRY(stat(name, &sb), ret); + if (ret == 0) + *existp = true; + else if (ret == ENOENT) { + *existp = false; + ret = 0; + } else + __wt_err(session, ret, "%s: stat", name); + + __wt_free(session, path); + return (ret); +} + +/* + * __posix_file_remove -- + * POSIX remove. + */ +static int +__posix_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + char *path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, name, false, true, NULL, NULL)) + WT_RET_MSG( + session, EINVAL, "%s: remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + WT_SYSCALL_RETRY(remove(name), ret); + if (ret != 0) + __wt_err(session, ret, "%s: remove", path); + + __wt_free(session, path); + return (ret); +} + +/* + * __posix_file_rename -- + * POSIX rename. + */ +static int +__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + char *from_path, *to_path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, from, false, true, NULL, NULL)) + WT_RET_MSG( + session, EINVAL, "%s: rename: file has open handles", from); + if (__wt_handle_search(session, to, false, true, NULL, NULL)) + WT_RET_MSG( + session, EINVAL, "%s: rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + from = from_path; + WT_ERR(__wt_filename(session, to, &to_path)); + to = to_path; + + WT_SYSCALL_RETRY(rename(from, to), ret); + if (ret != 0) + __wt_err(session, ret, "%s to %s: rename", from, to); + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); +} + +/* + * __posix_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__posix_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + /* + * Optionally don't log errors on ENOENT; some callers of this function + * expect failure in that case and don't want an error message logged. + */ + WT_SYSCALL_RETRY(stat(name, &sb), ret); + if (ret == 0) + *sizep = sb.st_size; + else if (ret != ENOENT || !silent) + __wt_err(session, ret, "%s: stat", name); + + __wt_free(session, path); + + return (ret); +} + +/* + * __posix_handle_advise -- + * POSIX fadvise. + */ +static int +__posix_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ +#if defined(HAVE_POSIX_FADVISE) + WT_DECL_RET; + + WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: posix_fadvise", fh->name); +#else + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + return (0); +#endif +} + +/* + * __posix_handle_close -- + * ANSI C close/fclose. + */ +static int +__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_DECL_RET; + int tret; + + if (fh->fp == NULL) { + WT_SYSCALL_RETRY(close(fh->fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: close", fh->name); + } + + /* If the handle was opened for writing, flush the file. */ + if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, ret, "%s: fflush", fh->name); + } + + if ((tret = fclose(fh->fp)) != 0) { + tret = __wt_errno(); + __wt_err(session, tret, "%s: fclose", fh->name); + } + return (ret == 0 ? tret : ret); +} + +/* + * __posix_handle_getc -- + * ANSI C fgetc. + */ +static int +__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, + ENOTSUP, "%s: getc: no stream configured", fh->name); + + *chp = fgetc(fh->fp); + if (*chp != EOF || !ferror(fh->fp)) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: getc", fh->name); +} + +/* + * __posix_handle_lock -- + * Lock/unlock a file. + */ +static int +__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + struct flock fl; + WT_DECL_RET; + + /* + * WiredTiger requires this function be able to acquire locks past + * the end of file. + * + * Note we're using fcntl(2) locking: all fcntl locks associated with a + * file for a given process are removed when any file descriptor for the + * file is closed by the process, even if a lock was never requested for + * that file descriptor. + */ + fl.l_start = 0; + fl.l_len = 1; + fl.l_type = lock ? F_WRLCK : F_UNLCK; + fl.l_whence = SEEK_SET; + + WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: fcntl", fh->name); +} + +/* + * __posix_handle_open -- + * POSIX fopen/open. + */ +static int +__posix_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *name, int dio_type, uint32_t flags) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + mode_t mode; + int f, fd, tret; + bool direct_io; + char *path, *stream_mode; + + conn = S2C(session); + direct_io = false; + + /* 0 is a legal file descriptor, set up error handling. */ + fh->fd = fd = -1; + + /* Create the path to the file. */ + path = NULL; + if (!LF_ISSET(WT_OPEN_FIXED)) { + WT_ERR(__wt_filename(session, name, &path)); + name = path; + } + + if (dio_type == WT_FILE_TYPE_DIRECTORY) { + WT_SYSCALL_RETRY(( + (fd = open(name, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); + if (ret == 0) + goto setupfh; + WT_ERR_MSG(session, ret, "%s: open", name); + } + + f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; + if (LF_ISSET(WT_OPEN_CREATE)) { + f |= O_CREAT; + if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + f |= O_EXCL; + mode = 0666; + } else + mode = 0; + +#ifdef O_BINARY + /* Windows clones: we always want to treat the file as a binary. */ + f |= O_BINARY; +#endif +#ifdef O_CLOEXEC + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. + */ + f |= O_CLOEXEC; +#endif +#ifdef O_DIRECT + if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { + f |= O_DIRECT; + direct_io = true; + } +#endif + fh->direct_io = direct_io; +#ifdef O_NOATIME + /* Avoid updating metadata for read-only workloads. */ + if (dio_type == WT_FILE_TYPE_DATA || + dio_type == WT_FILE_TYPE_CHECKPOINT) + f |= O_NOATIME; +#endif + + if (dio_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { +#ifdef O_DSYNC + f |= O_DSYNC; +#elif defined(O_SYNC) + f |= O_SYNC; +#else + WT_ERR_MSG(session, ENOTSUP, + "Unsupported log sync mode requested"); +#endif + } + + WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, + direct_io ? + "%s: open failed with direct I/O configured, some " + "filesystem types do not support direct I/O" : "%s", name); + +setupfh: +#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. There's an obvious + * race here, so we prefer the flag to open if available. + */ + if ((f = fcntl(fd, F_GETFD)) == -1 || + fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) + WT_ERR_MSG(session, __wt_errno(), "%s: fcntl", name); +#endif + + /* Disable read-ahead on trees: it slows down random read workloads. */ +#if defined(HAVE_POSIX_FADVISE) + if (dio_type == WT_FILE_TYPE_DATA || + dio_type == WT_FILE_TYPE_CHECKPOINT) { + WT_SYSCALL_RETRY( + posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, "%s: posix_fadvise", name); + } +#endif + + /* Configure file extension. */ + if (dio_type == WT_FILE_TYPE_DATA || + dio_type == WT_FILE_TYPE_CHECKPOINT) + fh->extend_len = conn->data_extend_len; + + /* Optionally configure the stream API. */ + switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { + case WT_STREAM_APPEND: + stream_mode = "a"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case WT_STREAM_READ: + stream_mode = "r"; + break; + case WT_STREAM_WRITE: + stream_mode = "w"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case 0: + default: + stream_mode = NULL; + break; + } + if (stream_mode != NULL && (fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), "%s: fopen", name); + + __wt_free(session, path); + fh->fd = fd; + return (0); + +err: if (fd != -1) { + WT_SYSCALL_RETRY(close(fd), tret); + if (tret != 0) + __wt_err(session, tret, "%s: close", name); + } + __wt_free(session, path); + fh->fd = -1; + fh->fp = NULL; + return (ret); +} + +/* + * __posix_handle_printf -- + * ANSI C vfprintf. + */ +static int +__posix_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh == WT_STDERR || fh == WT_STDOUT) { + if (vfprintf(fh == WT_STDERR ? stderr : stdout, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, + "%s: vfprintf", fh == WT_STDERR ? "stderr" : "stdout"); + } + + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: vfprintf", fh->name); +} + +/* + * __posix_handle_read -- + * POSIX pread. + */ +static int +__posix_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + size_t chunk; + ssize_t nr; + uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) + WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), + "%s read error: failed to read %" WT_SIZET_FMT + " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __posix_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + + WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); + if (ret == 0) { + *sizep = sb.st_size; + return (0); + } + WT_RET_MSG(session, ret, "%s: fstat", fh->name); +} + +/* + * __posix_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool wait) +{ + /* Flush any stream's stdio buffers. */ + if (fh == WT_STDERR || fh == WT_STDOUT) { + if (fflush(fh == WT_STDERR ? stderr : stdout) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), + "%s: fflush", fh == WT_STDERR ? "stderr" : "stdout"); + } + + if (fh->fp == NULL) + return (__posix_sync(session, fh->fd, fh->name, wait)); + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: fflush", fh->name); +} + +/* + * __posix_handle_truncate -- + * POSIX ftruncate. + */ +static int +__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + + WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: ftruncate", fh->name); +} + +/* + * __posix_handle_write -- + * POSIX pwrite. + */ +static int +__posix_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + size_t chunk; + ssize_t nw; + const uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) + WT_RET_MSG(session, __wt_errno(), + "%s write error: failed to write %" WT_SIZET_FMT + " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __wt_os_posix -- + * Initialize a POSIX configuration. + */ +int +__wt_os_posix(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + /* Initialize the POSIX jump table. */ + __wt_process.j_directory_sync = __posix_directory_sync; + __wt_process.j_file_exist = __posix_file_exist; + __wt_process.j_file_remove = __posix_file_remove; + __wt_process.j_file_rename = __posix_file_rename; + __wt_process.j_file_size = __posix_file_size; + __wt_process.j_handle_advise = __posix_handle_advise; + __wt_process.j_handle_close = __posix_handle_close; + __wt_process.j_handle_getc = __posix_handle_getc; + __wt_process.j_handle_lock = __posix_handle_lock; + __wt_process.j_handle_open = __posix_handle_open; + __wt_process.j_handle_printf = __posix_handle_printf; + __wt_process.j_handle_read = __posix_handle_read; + __wt_process.j_handle_size = __posix_handle_size; + __wt_process.j_handle_sync = __posix_handle_sync; + __wt_process.j_handle_truncate = __posix_handle_truncate; + __wt_process.j_handle_write = __posix_handle_write; + + return (0); +} + +/* + * __wt_os_posix_cleanup -- + * Discard a POSIX configuration. + */ +int +__wt_os_posix_cleanup(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (0); +} diff --git a/src/os_posix/os_remove.c b/src/os_posix/os_remove.c deleted file mode 100644 index eb2e37fdc38..00000000000 --- a/src/os_posix/os_remove.c +++ /dev/null @@ -1,69 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __remove_file_check -- - * Check if the file is currently open before removing it. - */ -static void -__remove_file_check(WT_SESSION_IMPL *session, const char *name) -{ -#ifdef HAVE_DIAGNOSTIC - WT_CONNECTION_IMPL *conn; - WT_FH *fh; - uint64_t bucket; - - conn = S2C(session); - WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY)); - fh = NULL; - bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; - - /* - * Check if the file is open: it's an error if it is, since a higher - * level should have closed it before removing. - */ - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(name, fh->name) == 0) - break; - __wt_spin_unlock(session, &conn->fh_lock); - - WT_ASSERT(session, fh == NULL); -#else - WT_UNUSED(session); - WT_UNUSED(name); -#endif -} - -/* - * __wt_remove -- - * Remove a file. - */ -int -__wt_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - char *path; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: remove", name)); - - __remove_file_check(session, name); - - WT_RET(__wt_filename(session, name, &path)); - - WT_SYSCALL_RETRY(remove(path), ret); - - __wt_free(session, path); - - if (ret == 0 || ret == ENOENT) - return (0); - - WT_RET_MSG(session, ret, "%s: remove", name); -} diff --git a/src/os_posix/os_rename.c b/src/os_posix/os_rename.c deleted file mode 100644 index 8ec4ee3aa23..00000000000 --- a/src/os_posix/os_rename.c +++ /dev/null @@ -1,40 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_rename -- - * Rename a file. - */ -int -__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_DECL_RET; - char *from_path, *to_path; - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "rename %s to %s", from, to)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - from_path = to_path = NULL; - - WT_RET(__wt_filename(session, from, &from_path)); - WT_TRET(__wt_filename(session, to, &to_path)); - - if (ret == 0) - WT_SYSCALL_RETRY(rename(from_path, to_path), ret); - - __wt_free(session, from_path); - __wt_free(session, to_path); - - if (ret == 0) - return (0); - - WT_RET_MSG(session, ret, "rename %s to %s", from, to); -} diff --git a/src/os_posix/os_rw.c b/src/os_posix/os_rw.c deleted file mode 100644 index 3d49fa7e712..00000000000 --- a/src/os_posix/os_rw.c +++ /dev/null @@ -1,90 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_read -- - * Read a chunk. - */ -int -__wt_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - size_t chunk; - ssize_t nr; - uint8_t *addr; - - WT_STAT_FAST_CONN_INCR(session, read_io); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) - WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), - "%s read error: failed to read %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __wt_write -- - * Write a chunk. - */ -int -__wt_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - size_t chunk; - ssize_t nw; - const uint8_t *addr; - - WT_STAT_FAST_CONN_INCR(session, write_io); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) - WT_RET_MSG(session, __wt_errno(), - "%s write error: failed to write %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} diff --git a/src/os_posix/os_stdio.c b/src/os_posix/os_stdio.c deleted file mode 100644 index 7ab107eda1e..00000000000 --- a/src/os_posix/os_stdio.c +++ /dev/null @@ -1,127 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_fopen -- - * Open a FILE handle. - */ -int -__wt_fopen(WT_SESSION_IMPL *session, - const char *name, WT_FHANDLE_MODE mode_flag, u_int flags, FILE **fpp) -{ - WT_DECL_RET; - const char *mode, *path; - char *pathbuf; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fopen", name)); - - pathbuf = NULL; - if (LF_ISSET(WT_FOPEN_FIXED)) - path = name; - else { - WT_RET(__wt_filename(session, name, &pathbuf)); - path = pathbuf; - } - - mode = NULL; - switch (mode_flag) { - case WT_FHANDLE_APPEND: - mode = WT_FOPEN_APPEND; - break; - case WT_FHANDLE_READ: - mode = WT_FOPEN_READ; - break; - case WT_FHANDLE_WRITE: - mode = WT_FOPEN_WRITE; - break; - } - *fpp = fopen(path, mode); - if (*fpp == NULL) - ret = __wt_errno(); - - if (pathbuf != NULL) - __wt_free(session, pathbuf); - - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: fopen", name); -} - -/* - * __wt_vfprintf -- - * Vfprintf for a FILE handle. - */ -int -__wt_vfprintf(FILE *fp, const char *fmt, va_list ap) -{ - return (vfprintf(fp, fmt, ap) < 0 ? __wt_errno() : 0); -} - -/* - * __wt_fprintf -- - * Fprintf for a FILE handle. - */ -int -__wt_fprintf(FILE *fp, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3))) -{ - WT_DECL_RET; - va_list ap; - - va_start(ap, fmt); - ret = __wt_vfprintf(fp, fmt, ap); - va_end(ap); - - return (ret); -} - -/* - * __wt_fflush -- - * Flush a FILE handle. - */ -int -__wt_fflush(FILE *fp) -{ - /* Flush the handle. */ - return (fflush(fp) == 0 ? 0 : __wt_errno()); -} - -/* - * __wt_fclose -- - * Close a FILE handle. - */ -int -__wt_fclose(FILE **fpp, WT_FHANDLE_MODE mode_flag) -{ - FILE *fp; - WT_DECL_RET; - - if (*fpp == NULL) - return (0); - - fp = *fpp; - *fpp = NULL; - - /* - * If the handle was opened for writing, flush the file to the backing - * OS buffers, then flush the OS buffers to the backing disk. - */ - if (mode_flag == WT_FHANDLE_APPEND || mode_flag == WT_FHANDLE_WRITE) { - ret = __wt_fflush(fp); - if (fsync(fileno(fp)) != 0) - WT_TRET(__wt_errno()); - } - - /* Close the handle. */ - if (fclose(fp) != 0) - WT_TRET(__wt_errno()); - - return (ret); -} diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 2a53ad58f52..3f7b34d132f 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -267,8 +267,9 @@ __wt_session_compact( session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, compact, config, cfg); + /* In-memory is already as compact as it's going to get. */ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - WT_ERR(ENOTSUP); + goto err; /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); diff --git a/src/support/err.c b/src/support/err.c index 875bd3efcf3..040b1ef742f 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -16,12 +16,15 @@ static int __handle_error_default(WT_EVENT_HANDLER *handler, WT_SESSION *wt_session, int error, const char *errmsg) { + WT_SESSION_IMPL *session; + WT_UNUSED(handler); - WT_UNUSED(wt_session); WT_UNUSED(error); - WT_RET(__wt_fprintf(stderr, "%s\n", errmsg)); - WT_RET(__wt_fflush(stderr)); + session = (WT_SESSION_IMPL *)wt_session; + + WT_RET(__wt_fprintf(session, WT_STDERR, "%s\n", errmsg)); + WT_RET(__wt_fsync(session, WT_STDERR, true)); return (0); } @@ -33,11 +36,13 @@ static int __handle_message_default(WT_EVENT_HANDLER *handler, WT_SESSION *wt_session, const char *message) { + WT_SESSION_IMPL *session; + WT_UNUSED(handler); - WT_UNUSED(wt_session); - WT_RET(__wt_fprintf(stdout, "%s\n", message)); - WT_RET(__wt_fflush(stdout)); + session = (WT_SESSION_IMPL *)wt_session; + WT_RET(__wt_fprintf(session, WT_STDOUT, "%s\n", message)); + WT_RET(__wt_fsync(session, WT_STDOUT, true)); return (0); } @@ -175,13 +180,13 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * example, we can end up here without a session.) */ if (session == NULL) { - WT_RET(__wt_fprintf(stderr, + WT_RET(__wt_fprintf(session, WT_STDERR, "WiredTiger Error%s%s: ", error == 0 ? "" : ": ", error == 0 ? "" : __wt_strerror(session, error, NULL, 0))); - WT_RET(__wt_vfprintf(stderr, fmt, ap)); - WT_RET(__wt_fprintf(stderr, "\n")); - return (__wt_fflush(stderr)); + WT_RET(__wt_vfprintf(session, WT_STDERR, fmt, ap)); + WT_RET(__wt_fprintf(session, WT_STDERR, "\n")); + return (__wt_fsync(session, WT_STDERR, true)); } p = s; diff --git a/src/support/filename.c b/src/support/filename.c index 215f5b47997..ac0aee5686e 100644 --- a/src/support/filename.c +++ b/src/support/filename.c @@ -103,11 +103,11 @@ __wt_rename_and_sync_directory( } /* - * __wt_fh_sync_and_rename -- - * Sync and close a file, and swap it into place. + * __wt_sync_handle_and_rename -- + * Sync and close a handle, and swap it into place. */ int -__wt_fh_sync_and_rename( +__wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to) { WT_DECL_RET; @@ -117,28 +117,9 @@ __wt_fh_sync_and_rename( *fhp = NULL; /* Flush to disk and close the handle. */ - ret = __wt_fsync(session, fh); + ret = __wt_fsync(session, fh, true); WT_TRET(__wt_close(session, &fh)); WT_RET(ret); return (__wt_rename_and_sync_directory(session, from, to)); } - -/* - * __wt_sync_fp_and_rename -- - * Sync and close a file, and swap it into place. - */ -int -__wt_sync_fp_and_rename( - WT_SESSION_IMPL *session, FILE **fpp, const char *from, const char *to) -{ - FILE *fp; - - fp = *fpp; - *fpp = NULL; - - /* Flush to disk and close the handle. */ - WT_RET(__wt_fclose(&fp, WT_FHANDLE_WRITE)); - - return (__wt_rename_and_sync_directory(session, from, to)); -} diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 4bb8ccdc6f0..6f0e475c277 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1200,7 +1200,7 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) /* Should have an underlying block manager reference. */ WT_ASSERT(session, bm != NULL); - return (bm->sync(bm, session, false)); + return (bm->sync(bm, session, true)); } /* diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 37a6e0b3711..347aa5f9dfb 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -10,7 +10,6 @@ /* Cookie passed to __txn_printlog. */ typedef struct { - FILE *out; uint32_t flags; } WT_TXN_PRINTLOG_ARGS; @@ -69,28 +68,27 @@ err: __wt_buf_free(session, &key); * Print a commit log record. */ static int -__txn_commit_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, - uint32_t flags) +__txn_commit_printlog(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { bool firstrecord; firstrecord = true; - WT_RET(__wt_fprintf(out, " \"ops\": [\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"ops\": [\n")); /* The logging subsystem zero-pads records. */ while (*pp < end && **pp) { if (!firstrecord) - WT_RET(__wt_fprintf(out, ",\n")); - WT_RET(__wt_fprintf(out, " {")); + WT_RET(__wt_fprintf(session, WT_STDOUT, ",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, " {")); firstrecord = false; - WT_RET(__wt_txn_op_printlog(session, pp, end, out, flags)); - WT_RET(__wt_fprintf(out, "\n }")); + WT_RET(__wt_txn_op_printlog(session, pp, end, flags)); + WT_RET(__wt_fprintf(session, WT_STDOUT, "\n }")); } - WT_RET(__wt_fprintf(out, "\n ]\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, "\n ]\n")); return (0); } @@ -465,7 +463,6 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_ITEM *rawrec, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord) { - FILE *out; WT_LOG_RECORD *logrec; WT_TXN_PRINTLOG_ARGS *args; const uint8_t *end, *p; @@ -477,7 +474,6 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_UNUSED(next_lsnp); args = cookie; - out = args->out; p = WT_LOG_SKIP_HEADER(rawrec->data); end = (const uint8_t *)rawrec->data + rawrec->size; @@ -488,16 +484,16 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_RET(__wt_logrec_read(session, &p, end, &rectype)); if (!firstrecord) - WT_RET(__wt_fprintf(out, ",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, ",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, " { \"lsn\" : [%" PRIu32 ",%" PRIu32 "],\n", lsnp->l.file, lsnp->l.offset)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"hdr_flags\" : \"%s\",\n", compressed ? "compressed" : "")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"rec_len\" : %" PRIu32 ",\n", logrec->len)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"mem_len\" : %" PRIu32 ",\n", compressed ? logrec->mem_len : logrec->len)); @@ -505,40 +501,44 @@ __txn_printlog(WT_SESSION_IMPL *session, case WT_LOGREC_CHECKPOINT: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset)); - WT_RET(__wt_fprintf(out, " \"type\" : \"checkpoint\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"type\" : \"checkpoint\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"ckpt_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n", lsnfile, lsnoffset)); break; case WT_LOGREC_COMMIT: WT_RET(__wt_vunpack_uint(&p, WT_PTRDIFF(end, p), &txnid)); - WT_RET(__wt_fprintf(out, " \"type\" : \"commit\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"type\" : \"commit\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"txnid\" : %" PRIu64 ",\n", txnid)); - WT_RET(__txn_commit_printlog(session, &p, end, out, - args->flags)); + WT_RET(__txn_commit_printlog(session, &p, end, args->flags)); break; case WT_LOGREC_FILE_SYNC: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(Ii), &fileid, &start)); - WT_RET(__wt_fprintf(out, " \"type\" : \"file_sync\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"type\" : \"file_sync\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"fileid\" : %" PRIu32 ",\n", fileid)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT, " \"start\" : %" PRId32 "\n", start)); break; case WT_LOGREC_MESSAGE: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(S), &msg)); - WT_RET(__wt_fprintf(out, " \"type\" : \"message\",\n")); - WT_RET(__wt_fprintf(out, " \"message\" : \"%s\"\n", msg)); + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"type\" : \"message\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, + " \"message\" : \"%s\"\n", msg)); break; } - WT_RET(__wt_fprintf(out, " }")); + WT_RET(__wt_fprintf(session, WT_STDOUT, " }")); return (0); } @@ -548,19 +548,18 @@ __txn_printlog(WT_SESSION_IMPL *session, * Print the log in a human-readable format. */ int -__wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags) +__wt_txn_printlog(WT_SESSION *wt_session, uint32_t flags) { WT_SESSION_IMPL *session; WT_TXN_PRINTLOG_ARGS args; session = (WT_SESSION_IMPL *)wt_session; - args.out = out; args.flags = flags; - WT_RET(__wt_fprintf(out, "[\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, "[\n")); WT_RET(__wt_log_scan( session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, &args)); - WT_RET(__wt_fprintf(out, "\n]\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT, "\n]\n")); return (0); } diff --git a/src/utilities/util_printlog.c b/src/utilities/util_printlog.c index 9a2bdc8a9ba..e7fa2134934 100644 --- a/src/utilities/util_printlog.c +++ b/src/utilities/util_printlog.c @@ -41,7 +41,7 @@ util_printlog(WT_SESSION *session, int argc, char *argv[]) if (argc != 0) return (usage()); - ret = __wt_txn_printlog(session, stdout, flags); + ret = __wt_txn_printlog(session, flags); if (ret != 0) { fprintf(stderr, "%s: printlog failed: %s\n", -- cgit v1.2.1 From 863766859d542f9e661b363e32abb6dfe37171fd Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 14:10:03 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Update the list of Windows build files. --- build_win/filelist.win | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/build_win/filelist.win b/build_win/filelist.win index b6a9caf4a74..206dbc9f397 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -105,18 +105,16 @@ src/os_posix/os_abort.c src/os_posix/os_alloc.c src/os_posix/os_getline.c src/os_posix/os_getopt.c +src/os_posix/os_init.c +src/os_posix/os_inmemory.c src/os_posix/os_mtx_rw.c -src/os_posix/os_stdio.c +src/os_posix/os_posix.c src/os_posix/os_strtouq.c src/os_win/os_dir.c src/os_win/os_dlopen.c src/os_win/os_errno.c -src/os_win/os_exist.c src/os_win/os_fallocate.c -src/os_win/os_filesize.c -src/os_win/os_flock.c src/os_win/os_fsync.c -src/os_win/os_ftruncate.c src/os_win/os_getenv.c src/os_win/os_map.c src/os_win/os_mtx_cond.c @@ -125,9 +123,6 @@ src/os_win/os_open.c src/os_win/os_pagesize.c src/os_win/os_path.c src/os_win/os_priv.c -src/os_win/os_remove.c -src/os_win/os_rename.c -src/os_win/os_rw.c src/os_win/os_sleep.c src/os_win/os_snprintf.c src/os_win/os_thread.c -- cgit v1.2.1 From 50f3d14f005bae32b0cbec8b9577b3a7bac4119f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 17:29:47 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Code to remove/create the working directory was checking the directory existed, before removing it, and using __wt_exist, which has changed. Remove the test, try and remove the directory if it exists or not. Rework the code to display the failing command in all cases, rename a few variables to match other parts of WiredTiger. --- test/utility/test_util.i | 47 ++++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/test/utility/test_util.i b/test/utility/test_util.i index c5cebadcb5c..f9b889a7610 100644 --- a/test/utility/test_util.i +++ b/test/utility/test_util.i @@ -101,13 +101,13 @@ testutil_die(int e, const char *fmt, ...) * Creates the full intended work directory in buffer. */ static inline void -testutil_work_dir_from_path(char *buffer, size_t inputSize, const char *dir) +testutil_work_dir_from_path(char *buffer, size_t len, const char *dir) { /* If no directory is provided, use the default. */ if (dir == NULL) dir = DEFAULT_DIR; - if (inputSize < strlen(dir) + 1) + if (len < strlen(dir) + 1) testutil_die(ENOMEM, "Not enough memory in buffer for directory %s", dir); @@ -116,55 +116,48 @@ testutil_work_dir_from_path(char *buffer, size_t inputSize, const char *dir) /* * testutil_clean_work_dir -- - * Remove any existing work directories, can optionally fail on error + * Remove the work directory. */ static inline void testutil_clean_work_dir(char *dir) { - size_t inputSize; + size_t len; int ret; - bool exist; - char *buffer; + char *buf; /* Additional bytes for the Windows rd command. */ - inputSize = strlen(dir) + sizeof(RM_COMMAND); - if ((buffer = malloc(inputSize)) == NULL) + len = strlen(dir) + strlen(RM_COMMAND) + 1; + if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); - snprintf(buffer, inputSize, "%s%s", RM_COMMAND, dir); + snprintf(buf, len, "%s%s", RM_COMMAND, dir); - exist = 0; - if ((ret = __wt_exist(NULL, dir, &exist)) != 0) - testutil_die(ret, - "Unable to check if directory exists"); - if (exist == 1 && (ret = system(buffer)) != 0) - testutil_die(ret, - "System call to remove directory failed"); - free(buffer); + if ((ret = system(buf)) != 0) + testutil_die(ret, "%s", buf); + free(buf); } /* * testutil_make_work_dir -- - * Delete the existing work directory if it exists, then create a new one. + * Delete the existing work directory, then create a new one. */ static inline void testutil_make_work_dir(char *dir) { - size_t inputSize; + size_t len; int ret; - char *buffer; + char *buf; testutil_clean_work_dir(dir); /* Additional bytes for the mkdir command */ - inputSize = strlen(dir) + sizeof(MKDIR_COMMAND); - if ((buffer = malloc(inputSize)) == NULL) + len = strlen(dir) + strlen(MKDIR_COMMAND) + 1; + if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); /* mkdir shares syntax between Windows and Linux */ - snprintf(buffer, inputSize, "%s%s", MKDIR_COMMAND, dir); - if ((ret = system(buffer)) != 0) - testutil_die(ret, "directory create call of '%s%s' failed", - MKDIR_COMMAND, dir); - free(buffer); + snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir); + if ((ret = system(buf)) != 0) + testutil_die(ret, "%s", buf); + free(buf); } -- cgit v1.2.1 From c7e7d2763d7aa9ccd9d390a266681ed05426f925 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 17:31:46 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Lint, unused variable. --- src/os_posix/os_inmemory.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index 4b65daa23f3..ad8553574cd 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -212,8 +212,6 @@ static int __im_handle_open(WT_SESSION_IMPL *session, WT_FH *fh, const char *path, int dio_type, u_int flags) { - WT_IM *im; - WT_UNUSED(path); WT_UNUSED(dio_type); WT_UNUSED(flags); -- cgit v1.2.1 From 8a5478cf23bd25b7d151f35a808d2a7ace1cf37b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 17:32:15 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Lint, unused variable. --- src/os_posix/os_inmemory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index ad8553574cd..eacff9808fa 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -212,6 +212,7 @@ static int __im_handle_open(WT_SESSION_IMPL *session, WT_FH *fh, const char *path, int dio_type, u_int flags) { + WT_UNUSED(session); WT_UNUSED(path); WT_UNUSED(dio_type); WT_UNUSED(flags); -- cgit v1.2.1 From 97329dbe577192755dbff702179bb781071cade6 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 19:16:58 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Missed a call to __wt_fsync_async, replace with __wt_fsync(..., false). --- src/block/block_write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/block/block_write.c b/src/block/block_write.c index 2f888b35320..49da084cee2 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -327,7 +327,7 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, (block->os_cache_dirty += align_size) > block->os_cache_dirty_max && __wt_session_can_wait(session)) { block->os_cache_dirty = 0; - WT_RET(__wt_fsync_async(session, fh)); + WT_RET(__wt_fsync(session, fh, false)); } #endif #ifdef HAVE_POSIX_FADVISE -- cgit v1.2.1 From 79fa34908c19d2733869930eb49d19391a07ab94 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 19:17:30 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Fix some compilation problems on Linux. --- src/os_posix/os_fsync.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/os_posix/os_fsync.c b/src/os_posix/os_fsync.c index 9a08a337215..457da0d3e0b 100644 --- a/src/os_posix/os_fsync.c +++ b/src/os_posix/os_fsync.c @@ -21,7 +21,7 @@ __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); #ifdef __linux__ - return (WT_JUMP(j_handle_sync, session, fh, true))); + return (WT_JUMP(j_handle_sync, session, fh, true)); #else WT_UNUSED(fh); return (0); @@ -35,14 +35,15 @@ __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) int __wt_directory_sync(WT_SESSION_IMPL *session, const char *path) { - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - #ifdef __linux__ WT_DECL_RET; - int fd, tret; const char *dir; char *copy; +#endif + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + +#ifdef __linux__ /* * POSIX 1003.1 does not require that fsync of a file handle ensures the * entry in the directory containing the file has also reached disk (and -- cgit v1.2.1 From 9c35479f976c78954e2295c1141cec89fd82886b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 19:27:53 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files old-style function definition --- examples/c/ex_event_handler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/c/ex_event_handler.c b/examples/c/ex_event_handler.c index ba6807cd56d..d1e08edb04d 100644 --- a/examples/c/ex_event_handler.c +++ b/examples/c/ex_event_handler.c @@ -90,7 +90,7 @@ handle_wiredtiger_message( /*! [Function event_handler] */ static int -config_event_handler() +config_event_handler(void) { WT_CONNECTION *conn; WT_SESSION *session; -- cgit v1.2.1 From c327f85218c72ae5c5a092c2a2523d47d8abbf57 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 18 Mar 2016 21:48:48 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Don't use "wait" as a variable name Use a size_t as the in-memory offset, not a wt_off_t Other minor type/variable name cleanups. --- src/block/block_mgr.c | 4 ++-- src/include/os.h | 6 +++--- src/lsm/lsm_tree.c | 4 ++-- src/os_posix/os_inmemory.c | 44 ++++++++++++++++++++++++-------------------- src/os_posix/os_posix.c | 15 ++++++++------- 5 files changed, 39 insertions(+), 34 deletions(-) diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c index 7db552b307c..e16b8709d7d 100644 --- a/src/block/block_mgr.c +++ b/src/block/block_mgr.c @@ -411,9 +411,9 @@ __bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats) * Flush a file to disk. */ static int -__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool wait) +__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool block) { - return (__wt_fsync(session, bm->block->fh, wait)); + return (__wt_fsync(session, bm->block->fh, block)); } /* diff --git a/src/include/os.h b/src/include/os.h index 863143387b3..b3dd0df686d 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -95,7 +95,7 @@ struct __wt_fh { /* * Underlying in-memory handle support. */ - wt_off_t off; /* Read/write offset */ + size_t off; /* Read/write offset */ WT_ITEM buf; /* Data */ bool direct_io; /* O_DIRECT configured */ @@ -177,11 +177,11 @@ __wt_filesize_name( * POSIX fflush/fsync. */ static inline int -__wt_fsync(WT_SESSION_IMPL *session, void *fh, bool wait) +__wt_fsync(WT_SESSION_IMPL *session, void *fh, bool block) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - return (WT_JUMP(j_handle_sync, session, fh, wait)); + return (WT_JUMP(j_handle_sync, session, fh, block)); } /* diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index b64b72256cc..25ff336dd6f 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -85,7 +85,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) * Close an LSM tree structure. */ static int -__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool wait) +__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool block) { WT_DECL_RET; int i; @@ -100,7 +100,7 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool wait) * access is not available. */ for (i = 0; - lsm_tree->queue_ref > 0 || (wait && lsm_tree->refcnt > 1); ++i) { + lsm_tree->queue_ref > 0 || (block && lsm_tree->refcnt > 1); ++i) { /* * Remove any work units from the manager queues. Do this step * repeatedly in case a work unit was in the process of being diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index eacff9808fa..fdc7be9dfc0 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -133,7 +133,7 @@ __im_file_size( __wt_spin_lock(session, &im->lock); if (__wt_handle_search(session, name, false, false, NULL, &fh)) { - *sizep = fh->buf.size; + *sizep = (wt_off_t)fh->buf.size; __wt_handle_search_unlock(session); } else ret = ENOENT; @@ -262,7 +262,7 @@ __im_handle_printf( __wt_spin_lock(session, &im->lock); /* Grow the handle's buffer as necessary. */ - WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)(fh->off + len))); + WT_ERR(__wt_buf_grow(session, &fh->buf, fh->off + len)); /* Copy the data into place and update the offset. */ memcpy((uint8_t *)fh->buf.mem + fh->off, tmp->data, len); @@ -284,14 +284,16 @@ __im_handle_read( { WT_DECL_RET; WT_IM *im; + size_t off; im = __wt_process.inmemory; __wt_spin_lock(session, &im->lock); - if (offset < fh->buf.size) { - len = WT_MIN(len, (size_t)(fh->buf.size - offset)); - memcpy(buf, (uint8_t *)fh->buf.mem + offset, len); - fh->off = offset + len; + off = (size_t)offset; + if (off < fh->buf.size) { + len = WT_MIN(len, fh->buf.size - off); + memcpy(buf, (uint8_t *)fh->buf.mem + off, len); + fh->off = off + len; } else ret = WT_ERROR; @@ -299,9 +301,9 @@ __im_handle_read( if (ret == 0) return (0); WT_RET_MSG(session, WT_ERROR, - "%s read error: failed to read %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset); + "%s read error: failed to read %" WT_SIZET_FMT " bytes at " + "offset %" WT_SIZET_FMT, + fh->name, len, off); } /* @@ -313,7 +315,7 @@ __im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) { WT_UNUSED(session); - *sizep = fh->buf.size; + *sizep = (wt_off_t)fh->buf.size; return (0); } @@ -322,9 +324,9 @@ __im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) * POSIX fflush/fsync. */ static int -__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool wait) +__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { - WT_UNUSED(wait); + WT_UNUSED(block); /* Flush any stream's stdio buffers. */ if (fh == WT_STDERR || fh == WT_STDOUT) { @@ -367,24 +369,26 @@ __im_handle_write(WT_SESSION_IMPL *session, { WT_DECL_RET; WT_IM *im; + size_t off; im = __wt_process.inmemory; __wt_spin_lock(session, &im->lock); - WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)(offset + len + 1024))); + off = (size_t)offset; + WT_ERR(__wt_buf_grow(session, &fh->buf, off + len + 1024)); - memcpy((uint8_t *)fh->buf.data + offset, buf, len); - if (offset + len > fh->buf.size) - fh->buf.size = (size_t)(offset + len); - fh->off = offset + len; + memcpy((uint8_t *)fh->buf.data + off, buf, len); + if (off + len > fh->buf.size) + fh->buf.size = off + len; + fh->off = off + len; err: __wt_spin_unlock(session, &im->lock); if (ret == 0) return (0); WT_RET_MSG(session, ret, - "%s write error: failed to write %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset); + "%s write error: failed to write %" WT_SIZET_FMT " bytes at " + "offset %" WT_SIZET_FMT, + fh->name, len, off); } /* diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index e3b39d69363..c348f5cdcf1 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -13,12 +13,12 @@ * Underlying support function to flush a file handle. */ static int -__posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, bool wait) +__posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, bool block) { WT_DECL_RET; #ifdef HAVE_SYNC_FILE_RANGE - if (!wait) { + if (!block) { WT_SYSCALL_RETRY(sync_file_range(fd, (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); if (ret == 0) @@ -26,7 +26,7 @@ __posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, bool wait) WT_RET_MSG(session, ret, "%s: sync_file_range", name); } #else - if (!wait) + if (!block) return (0); #endif @@ -86,7 +86,7 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) WT_SYSCALL_RETRY(close(fd), tret); if (tret != 0) - __wt_err(session, tret, "%s: fsync", name); + __wt_err(session, tret, "%s: fsync", path); return (ret == 0 ? tret : ret); #else WT_UNUSED(session); @@ -328,7 +328,8 @@ __posix_handle_open(WT_SESSION_IMPL *session, mode_t mode; int f, fd, tret; bool direct_io; - char *path, *stream_mode; + char *path; + const char *stream_mode; conn = S2C(session); direct_io = false; @@ -550,7 +551,7 @@ __posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) * POSIX fflush/fsync. */ static int -__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool wait) +__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { /* Flush any stream's stdio buffers. */ if (fh == WT_STDERR || fh == WT_STDOUT) { @@ -561,7 +562,7 @@ __posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool wait) } if (fh->fp == NULL) - return (__posix_sync(session, fh->fd, fh->name, wait)); + return (__posix_sync(session, fh->fd, fh->name, block)); if (fflush(fh->fp) == 0) return (0); -- cgit v1.2.1 From 5a6d06b712eff6be8a77c7944636e98b3ccab1fa Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 19 Mar 2016 10:06:56 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Push the rest of the __linux__ specific code for fsync down into the POSIX module. --- build_win/filelist.win | 1 - dist/filelist | 1 - src/include/extern.h | 2 -- src/include/os.h | 32 +++++++++++++++++++++ src/os_posix/os_fsync.c | 75 ------------------------------------------------- src/os_posix/os_posix.c | 21 ++++++++++++++ 6 files changed, 53 insertions(+), 79 deletions(-) delete mode 100644 src/os_posix/os_fsync.c diff --git a/build_win/filelist.win b/build_win/filelist.win index 206dbc9f397..4b3847f5f35 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -114,7 +114,6 @@ src/os_win/os_dir.c src/os_win/os_dlopen.c src/os_win/os_errno.c src/os_win/os_fallocate.c -src/os_win/os_fsync.c src/os_win/os_getenv.c src/os_win/os_map.c src/os_win/os_mtx_cond.c diff --git a/dist/filelist b/dist/filelist index 5088913d6fb..776bc3721f4 100644 --- a/dist/filelist +++ b/dist/filelist @@ -107,7 +107,6 @@ src/os_posix/os_dir.c src/os_posix/os_dlopen.c src/os_posix/os_errno.c src/os_posix/os_fallocate.c -src/os_posix/os_fsync.c src/os_posix/os_getenv.c src/os_posix/os_getline.c src/os_posix/os_getopt.c diff --git a/src/include/extern.h b/src/include/extern.h index 0216b6e62cc..b4819110c52 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -501,8 +501,6 @@ extern int __wt_map_error_rdonly(int error); extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh); extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); -extern int __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_directory_sync(WT_SESSION_IMPL *session, const char *path); extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); diff --git a/src/include/os.h b/src/include/os.h index b3dd0df686d..fcb4d3d60b7 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -116,6 +116,38 @@ struct __wt_fh { /* * OS calls that are currently just stubs. */ +/* + * __wt_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static inline int +__wt_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (WT_JUMP(j_directory_sync, session, path)); +} + +/* + * __wt_directory_sync_fh -- + * Flush a directory file handle to ensure file creation is durable. + * + * We don't use the normal sync path because many file systems don't require + * this step and we don't want to penalize them. + */ +static inline int +__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + +#ifdef __linux__ + return (WT_JUMP(j_handle_sync, session, fh, true)); +#else + WT_UNUSED(fh); + return (0); +#endif +} + /* * __wt_exist -- * Return if the file exists. diff --git a/src/os_posix/os_fsync.c b/src/os_posix/os_fsync.c deleted file mode 100644 index 457da0d3e0b..00000000000 --- a/src/os_posix/os_fsync.c +++ /dev/null @@ -1,75 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_directory_sync_fh -- - * Flush a directory file handle to ensure file creation is durable. - * - * We don't use fsync because most file systems don't require this step and - * we don't want to penalize them by calling fsync. - */ -int -__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - -#ifdef __linux__ - return (WT_JUMP(j_handle_sync, session, fh, true)); -#else - WT_UNUSED(fh); - return (0); -#endif -} - -/* - * __wt_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -int -__wt_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ -#ifdef __linux__ - WT_DECL_RET; - const char *dir; - char *copy; -#endif - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - -#ifdef __linux__ - /* - * POSIX 1003.1 does not require that fsync of a file handle ensures the - * entry in the directory containing the file has also reached disk (and - * there are historic Linux filesystems requiring this), do an explicit - * fsync on a file descriptor for the directory to be sure. - */ - copy = NULL; - if (path == NULL || (dir = strrchr(path, '/')) == NULL) - path = S2C(session)->home; - else { - /* - * Copy the directory name, leaving the trailing slash in place, - * so a path of "/foo" doesn't result in an empty string. - */ - WT_RET(__wt_strndup( - session, path, (size_t)(dir - path) + 1, ©)); - path = copy; - } - - ret = WT_JUMP(j_directory_sync, session, path); - - __wt_free(session, copy); - - return (ret); -#else - WT_UNUSED(path); - return (0); -#endif -} diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index c348f5cdcf1..b6b2ca4c582 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -76,6 +76,27 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) #ifdef __linux__ WT_DECL_RET; int fd, tret; + const char *dir; + char *copy; + + /* + * POSIX 1003.1 does not require that fsync of a file handle ensures the + * entry in the directory containing the file has also reached disk (and + * there are historic Linux filesystems requiring this), do an explicit + * fsync on a file descriptor for the directory to be sure. + */ + copy = NULL; + if (path == NULL || (dir = strrchr(path, '/')) == NULL) + path = S2C(session)->home; + else { + /* + * Copy the directory name, leaving the trailing slash in place, + * so a path of "/foo" doesn't result in an empty string. + */ + WT_RET(__wt_strndup( + session, path, (size_t)(dir - path) + 1, ©)); + path = copy; + } WT_SYSCALL_RETRY(( (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); -- cgit v1.2.1 From 08e11cc4822d97f9cf0c22ae9b3f6b2a1718ac0a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 19 Mar 2016 10:32:32 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Don't create/sweep/whatever the LAS table when running in-memory. --- src/cache/cache_las.c | 4 +++- src/conn/conn_sweep.c | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index 8796ec6b2fc..fd541458fa8 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -58,8 +58,10 @@ __wt_las_create(WT_SESSION_IMPL *session) conn = S2C(session); - if (F_ISSET(conn, WT_CONN_READONLY)) + /* Read-only and in-memory configurations don't need the LAS table. */ + if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) return (0); + /* * Done at startup: we cannot do it on demand because we require the * schema lock to create and drop the table, and it may not always be diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index cc0aa5a1322..5d24ea61607 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -369,8 +369,9 @@ __wt_sweep_create(WT_SESSION_IMPL *session) * * Don't tap the sweep thread for eviction. */ - session_flags = WT_SESSION_CAN_WAIT | - WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION; + session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_NO_EVICTION; + if (F_ISSET(conn, WT_CONN_LAS_OPEN)) + session_flags |= WT_SESSION_LOOKASIDE_CURSOR; WT_RET(__wt_open_internal_session( conn, "sweep-server", true, session_flags, &conn->sweep_session)); session = conn->sweep_session; -- cgit v1.2.1 From bd445d59099c69d433432106067690f4ab50171f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 19 Mar 2016 12:28:12 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Check the wiredtiger_open() config string and the WIREDTIGER_CONFIG environment variable for read-only and in-memory configuration before looking at anything else. --- src/conn/conn_api.c | 83 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index c8a4c96d7a5..e5187c2a046 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1940,52 +1940,64 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, /* Remaining basic initialization of the connection structure. */ WT_ERR(__wt_connection_init(conn)); - /* Check/set the application-specified configuration string. */ + /* Check the application-specified configuration string. */ WT_ERR(__wt_config_check(session, WT_CONFIG_REF(session, wiredtiger_open), config, 0)); - cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open); - cfg[1] = config; - - /* Capture the config_base setting file for later use. */ - WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval)); - config_base_set = cval.val != 0; - - /* Configure error messages so we get them right early. */ - WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); - if (cval.len != 0) - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &conn->error_prefix)); /* - * Look for read-only early (for example, it configures use of the base - * config file). + * Build the temporary, initial configuration stack, in the following + * order (where later entries override earlier entries): + * + * 1. the base configuration for the wiredtiger_open call + * 2. the config passed in by the application + * 3. environment variable settings (optional) * - * XXX - * We haven't read the WIREDTIGER_CONFIG environment variable, we need - * to fix that. + * In other words, a configuration stack based on the application's + * passed-in information and nothing else. */ - WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval)); - if (cval.val) - F_SET(conn, WT_CONN_READONLY); + cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open); + cfg[1] = config; + WT_ERR(__wt_scr_alloc(session, 0, &i1)); + WT_ERR(__conn_config_env(session, cfg, i1)); /* - * Look for in-memory early (for example, it configures writing the base - * config file). + * We need to know if configured for read-only or in-memory behavior + * before reading/writing the filesystem. The only way the application + * can configure that before we touch the filesystem is the wiredtiger + * config string or the WIREDTIGER_CONFIG environment variable. * - * XXX - * We haven't read the WIREDTIGER_CONFIG environment variable, we need - * to fix that. + * The environment isn't trusted by default, for security reasons; if + * the application wants us to trust the environment before reading + * the filesystem, the wiredtiger_open config string is the only way. */ WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval)); if (cval.val != 0) F_SET(conn, WT_CONN_IN_MEMORY); + WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval)); + if (cval.val) + F_SET(conn, WT_CONN_READONLY); /* - * After checking readonly and in-memory, but before we do anything - * that touches an underlying filesystem, configure the OS layer. + * After checking readonly and in-memory, but before we do anything that + * touches the filesystem, configure the OS layer. */ WT_ERR(__wt_os_init(session)); + /* + * Capture the config_base setting file for later use. Again, if the + * application doesn't want us to read the base configuration file, + * the WIREDTIGER_CONFIG environment variable or the wiredtiger_open + * config string are the only ways. + */ + WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval)); + config_base_set = cval.val != 0; + + /* Configure error messages so we get them right early. */ + WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); + if (cval.len != 0) + WT_ERR(__wt_strndup( + session, cval.str, cval.len, &conn->error_prefix)); + /* Get the database home. */ WT_ERR(__conn_home(session, home, cfg)); @@ -1993,8 +2005,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__conn_single(session, cfg)); /* - * Build the configuration stack, in the following order (where later - * entries override earlier entries): + * Build the real configuration stack, in the following order (where + * later entries override earlier entries): * * 1. all possible wiredtiger_open configurations * 2. the WiredTiger compilation version (expected to be overridden by @@ -2008,7 +2020,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, * Clear the entries we added to the stack, we're going to build it in * order. */ - WT_ERR(__wt_scr_alloc(session, 0, &i1)); WT_ERR(__wt_scr_alloc(session, 0, &i2)); WT_ERR(__wt_scr_alloc(session, 0, &i3)); cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open_all); @@ -2031,11 +2042,15 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, * Merge the full configuration stack and save it for reconfiguration. */ WT_ERR(__wt_config_merge(session, cfg, NULL, &merge_cfg)); + /* - * The read-only setting may have been set in a configuration file. - * Get it again so that we can override other configuration settings - * before they are processed by the subsystems. + * Read-only and in-memory settings may have been set in a configuration + * file (not optimal, but we can handle it). Get those settings again so + * we can override other configuration settings as they are processed. */ + WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval)); + if (cval.val != 0) + F_SET(conn, WT_CONN_IN_MEMORY); WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval)); if (cval.val) F_SET(conn, WT_CONN_READONLY); -- cgit v1.2.1 From 174db9f11f03309dc560c0e11d60f226db6cb197 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 20 Mar 2016 16:32:05 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Move file based APIs into the WT_CONNECTION_IMPL structure, move file-handle based APIs into the WT_FH structure. Create real WT_FH structures for stdout/stderr instead of using fake pointer values to flag them, give them their own functions set up when the WT_CONNECTION_IMPL structure is first created. Move file based and file-handle based API stubs into misc.i for now, not sure where they'll end up, or if we'll push WT_FH methods up into the WiredTiger code. --- build_win/filelist.win | 1 + dist/filelist | 1 + dist/log.py | 4 +- src/btree/bt_debug.c | 2 +- src/conn/conn_api.c | 6 +- src/conn/conn_handle.c | 3 + src/evict/evict_lru.c | 4 +- src/include/connection.h | 48 +++---- src/include/extern.h | 1 + src/include/misc.i | 194 ++++++++++++++++++++++++++++ src/include/os.h | 218 +++---------------------------- src/include/wt_internal.h | 2 +- src/log/log_auto.c | 58 ++++----- src/os_posix/os_getline.c | 2 +- src/os_posix/os_inmemory.c | 112 ++++++++-------- src/os_posix/os_open.c | 12 +- src/os_posix/os_posix.c | 315 ++++++++++++++++++++++----------------------- src/os_posix/os_stdio.c | 167 ++++++++++++++++++++++++ src/support/err.c | 16 +-- src/txn/txn_log.c | 45 +++---- 20 files changed, 678 insertions(+), 533 deletions(-) create mode 100644 src/os_posix/os_stdio.c diff --git a/build_win/filelist.win b/build_win/filelist.win index 4b3847f5f35..df1efc23830 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -109,6 +109,7 @@ src/os_posix/os_init.c src/os_posix/os_inmemory.c src/os_posix/os_mtx_rw.c src/os_posix/os_posix.c +src/os_posix/os_stdio.c src/os_posix/os_strtouq.c src/os_win/os_dir.c src/os_win/os_dlopen.c diff --git a/dist/filelist b/dist/filelist index 776bc3721f4..e1595cbb5d8 100644 --- a/dist/filelist +++ b/dist/filelist @@ -122,6 +122,7 @@ src/os_posix/os_path.c src/os_posix/os_posix.c src/os_posix/os_priv.c src/os_posix/os_sleep.c +src/os_posix/os_stdio.c src/os_posix/os_strtouq.c src/os_posix/os_thread.c src/os_posix/os_time.c diff --git a/dist/log.py b/dist/log.py index ad93a6dfb44..9201b20054b 100644 --- a/dist/log.py +++ b/dist/log.py @@ -89,7 +89,7 @@ def printf_line(f, optype, i, ishex): ifbegin = 'if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {' + nl_indent if postcomma == '': precomma = ',\\n' - body = '%s%s(__wt_fprintf(session, WT_STDOUT,' % ( + body = '%s%s(__wt_fprintf(session, WT_STDOUT(session),' % ( printf_setup(f, ishex, nl_indent), 'WT_ERR' if has_escape(optype.fields) else 'WT_RET') + \ '%s "%s \\"%s\\": \\"%s\\"%s",%s));' % ( @@ -300,7 +300,7 @@ __wt_logop_%(name)s_print(WT_SESSION_IMPL *session, \t%(arg_unused)s%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack( \t session, pp, end%(arg_addrs)s)); -\tWT_RET(__wt_fprintf(session, WT_STDOUT, +\tWT_RET(__wt_fprintf(session, WT_STDOUT(session), \t " \\"optype\\": \\"%(name)s\\",\\n")); \t%(print_args)s %(arg_fini)s diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 5c91e89e033..6a36912612f 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -201,7 +201,7 @@ __wt_debug_addr_print( WT_DECL_RET; WT_RET(__wt_scr_alloc(session, 128, &buf)); - ret = __wt_fprintf(session, WT_STDERR, + ret = __wt_fprintf(session, WT_STDERR(session), "%s\n", __wt_addr_string(session, addr, addr_size, buf)); __wt_scr_free(session, &buf); diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index e5187c2a046..fe6a782d08a 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1934,10 +1934,12 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, session = conn->default_session = &conn->dummy_session; session->iface.connection = &conn->iface; session->name = "wiredtiger_open"; - __wt_random_init(&session->rnd); + + /* Do standard I/O and error handling first. */ + WT_ERR(__wt_os_stdio(session)); __wt_event_handler_set(session, event_handler); - /* Remaining basic initialization of the connection structure. */ + /* Basic initialization of the connection structure. */ WT_ERR(__wt_connection_init(conn)); /* Check the application-specified configuration string. */ diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index eb89949f79e..5f4c38e7361 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -41,6 +41,9 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) TAILQ_INIT(&conn->lsm_manager.appqh); TAILQ_INIT(&conn->lsm_manager.managerqh); + /* Random numbers. */ + __wt_random_init(&session->rnd); + /* Configuration. */ WT_RET(__wt_conn_config_init(session)); diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 061db40ccf0..d3e32d7fc23 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1702,7 +1702,7 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) total_bytes = 0; if (ofile == NULL) - fh = WT_STDERR; + fh = WT_STDERR(session); else WT_RET(__wt_open(session, ofile, WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_STREAM_WRITE, &fh)); @@ -1785,7 +1785,7 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) "MB vs tracked inuse %" PRIu64 "MB\n", total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20); (void)__wt_fprintf(session, fh, "==========\n"); - if (fh != WT_STDERR) + if (ofile != NULL) WT_RET(__wt_close(session, &fh)); return (0); } diff --git a/src/include/connection.h b/src/include/connection.h index 7e74603d650..c578ecf0fb8 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -25,36 +25,6 @@ struct __wt_process { /* Locked: connection queue */ TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh; WT_CACHE_POOL *cache_pool; - - void *inmemory; /* In-memory configuration cookie */ - - /* - * OS library/system call jump table, to support in-memory and readonly - * configurations as well as special devices with other non-POSIX APIs. - */ -#define WT_JUMP(func, ...) __wt_process.func(__VA_ARGS__) - int (*j_directory_sync)(WT_SESSION_IMPL *, const char *path); - int (*j_file_exist)(WT_SESSION_IMPL *, const char *, bool *); - int (*j_file_remove)(WT_SESSION_IMPL *, const char *); - int (*j_file_rename)(WT_SESSION_IMPL *, const char *, const char *); - int (*j_file_size)( - WT_SESSION_IMPL *, const char *, bool, wt_off_t *); - int (*j_handle_advise)( - WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t, int); - int (*j_handle_close)(WT_SESSION_IMPL *, WT_FH *); - int (*j_handle_getc)(WT_SESSION_IMPL *, WT_FH *, int *); - int (*j_handle_lock)(WT_SESSION_IMPL *, WT_FH *, bool); - int (*j_handle_open)( - WT_SESSION_IMPL *, WT_FH *, const char *, int, u_int); - int (*j_handle_printf)( - WT_SESSION_IMPL *, WT_FH *, const char *, va_list); - int (*j_handle_read)( - WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *); - int (*j_handle_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); - int (*j_handle_sync)(WT_SESSION_IMPL *, WT_FH *, bool); - int (*j_handle_truncate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t); - int (*j_handle_write)( - WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, const void *); }; extern WT_PROCESS __wt_process; @@ -451,5 +421,23 @@ struct __wt_connection_impl { int page_size; /* OS page size for mmap alignment */ uint32_t verbose; + void *inmemory; /* In-memory configuration cookie */ + +#define WT_STDERR(s) (&S2C(s)->wt_stderr) +#define WT_STDOUT(s) (&S2C(s)->wt_stdout) + WT_FH wt_stderr, wt_stdout; + + /* + * OS library/system call jump table, to support in-memory and readonly + * configurations as well as special devices with other non-POSIX APIs. + */ + int (*file_directory_sync)(WT_SESSION_IMPL *, const char *path); + int (*file_exist)(WT_SESSION_IMPL *, const char *, bool *); + int (*file_remove)(WT_SESSION_IMPL *, const char *); + int (*file_rename)(WT_SESSION_IMPL *, const char *, const char *); + int (*file_size)(WT_SESSION_IMPL *, const char *, bool, wt_off_t *); + int (*handle_open)( + WT_SESSION_IMPL *, WT_FH *, const char *, int, u_int); + uint32_t flags; }; diff --git a/src/include/extern.h b/src/include/extern.h index b4819110c52..8c88b578591 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -537,6 +537,7 @@ extern int __wt_os_posix(WT_SESSION_IMPL *session); extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session); extern bool __wt_has_priv(void); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern int __wt_os_stdio(WT_SESSION_IMPL *session); extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); diff --git a/src/include/misc.i b/src/include/misc.i index 04376441340..b3d4f43a30c 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -70,3 +70,197 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...) return (0); #endif } + +/* + * __wt_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static inline int +__wt_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (S2C(session)->file_directory_sync(session, path)); +} + +/* + * __wt_directory_sync_fh -- + * Flush a directory file handle to ensure file creation is durable. + * + * We don't use the normal sync path because many file systems don't require + * this step and we don't want to penalize them. + */ +static inline int +__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + +#ifdef __linux__ + return (fh->handle_sync(session, fh, true)); +#else + WT_UNUSED(fh); + return (0); +#endif +} + +/* + * __wt_exist -- + * Return if the file exists. + */ +static inline int +__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + return (S2C(session)->file_exist(session, name, existp)); +} + +/* + * __wt_posix_fadvise -- + * POSIX fadvise. + */ +static inline int +__wt_posix_fadvise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ +#if defined(HAVE_POSIX_FADVISE) + return (fh->fh_advise(session, fh, offset, len, advice)); +#else + return (0); +#endif +} + +/* + * __wt_file_lock -- + * Lock/unlock a file. + */ +static inline int +__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) +{ + return (fh->fh_lock(session, fh, lock)); +} + +/* + * __wt_filesize -- + * Get the size of a file in bytes, by file handle. + */ +static inline int +__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + return (fh->fh_size(session, fh, sizep)); +} + +/* + * __wt_filesize_name -- + * Get the size of a file in bytes, by file name. + */ +static inline int +__wt_filesize_name( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + return (S2C(session)->file_size(session, name, silent, sizep)); +} + +/* + * __wt_fsync -- + * POSIX fflush/fsync. + */ +static inline int +__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (fh->fh_sync(session, fh, block)); +} + +/* + * __wt_ftruncate -- + * POSIX ftruncate. + */ +static inline int +__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (fh->fh_truncate(session, fh, len)); +} + +/* + * __wt_read -- + * POSIX pread. + */ +static inline int +__wt_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_STAT_FAST_CONN_INCR(session, read_io); + + return (fh->fh_read(session, fh, offset, len, buf)); +} + +/* + * __wt_remove -- + * POSIX remove. + */ +static inline int +__wt_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (S2C(session)->file_remove(session, name)); +} + +/* + * __wt_rename -- + * POSIX rename. + */ +static inline int +__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (S2C(session)->file_rename(session, from, to)); +} + +/* + * __wt_write -- + * POSIX pwrite. + */ +static inline int +__wt_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || + WT_STRING_MATCH(fh->name, + WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); + + WT_STAT_FAST_CONN_INCR(session, write_io); + + return (fh->fh_write(session, fh, offset, len, buf)); +} + +/* + * __wt_vfprintf -- + * ANSI C vfprintf. + */ +static inline int +__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + return (fh->fh_printf(session, fh, fmt, ap)); +} + +/* + * __wt_fprintf -- + * ANSI C fprintf. + */ +static inline int +__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) +{ + WT_DECL_RET; + va_list ap; + + va_start(ap, fmt); + ret = __wt_vfprintf(session, fh, fmt, ap); + va_end(ap); + + return (ret); +} diff --git a/src/include/os.h b/src/include/os.h index fcb4d3d60b7..2e2f63da6ac 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -66,11 +66,8 @@ #define WT_STREAM_READ 0x020 /* Open a stream: read */ #define WT_STREAM_WRITE 0x040 /* Open a stream: write */ -#define WT_STDERR ((void *)0x1) /* WT_FH to stderr */ -#define WT_STDOUT ((void *)0x2) /* WT_FH to stdout */ - struct __wt_fh { - char *name; /* File name */ + const char *name; /* File name */ uint64_t name_hash; /* Hash of name */ TAILQ_ENTRY(__wt_fh) q; /* List of open handles */ TAILQ_ENTRY(__wt_fh) hashq; /* Hashed list of handles */ @@ -108,204 +105,19 @@ struct __wt_fh { WT_FALLOCATE_SYS } fallocate_available; bool fallocate_requires_locking; -#define WT_FH_IN_MEMORY 0x01 /* In-memory, don't remove */ -#define WT_FH_FLUSH_ON_CLOSE 0x02 /* Flush when closing */ +#define WT_FH_FLUSH_ON_CLOSE 0x01 /* Flush when closing */ +#define WT_FH_IN_MEMORY 0x02 /* In-memory, don't remove */ uint32_t flags; -}; -/* - * OS calls that are currently just stubs. - */ -/* - * __wt_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -static inline int -__wt_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - return (WT_JUMP(j_directory_sync, session, path)); -} - -/* - * __wt_directory_sync_fh -- - * Flush a directory file handle to ensure file creation is durable. - * - * We don't use the normal sync path because many file systems don't require - * this step and we don't want to penalize them. - */ -static inline int -__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - -#ifdef __linux__ - return (WT_JUMP(j_handle_sync, session, fh, true)); -#else - WT_UNUSED(fh); - return (0); -#endif -} - -/* - * __wt_exist -- - * Return if the file exists. - */ -static inline int -__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) -{ - return (WT_JUMP(j_file_exist, session, name, existp)); -} - -/* - * __wt_posix_fadvise -- - * POSIX fadvise. - */ -static inline int -__wt_posix_fadvise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) -{ -#if defined(HAVE_POSIX_FADVISE) - return (WT_JUMP(j_handle_advise, session, fh, offset, len, advice)); -#else - return (0); -#endif -} - -/* - * __wt_file_lock -- - * Lock/unlock a file. - */ -static inline int -__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) -{ - return (WT_JUMP(j_handle_lock, session, fh, lock)); -} - -/* - * __wt_filesize -- - * Get the size of a file in bytes, by file handle. - */ -static inline int -__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - return (WT_JUMP(j_handle_size, session, fh, sizep)); -} - -/* - * __wt_filesize_name -- - * Get the size of a file in bytes, by file name. - */ -static inline int -__wt_filesize_name( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) -{ - return (WT_JUMP(j_file_size, session, name, silent, sizep)); -} - -/* - * __wt_fsync -- - * POSIX fflush/fsync. - */ -static inline int -__wt_fsync(WT_SESSION_IMPL *session, void *fh, bool block) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - return (WT_JUMP(j_handle_sync, session, fh, block)); -} - -/* - * __wt_ftruncate -- - * POSIX ftruncate. - */ -static inline int -__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - return (WT_JUMP(j_handle_truncate, session, fh, len)); -} - -/* - * __wt_read -- - * POSIX pread. - */ -static inline int -__wt_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - WT_STAT_FAST_CONN_INCR(session, read_io); - - return (WT_JUMP(j_handle_read, session, fh, offset, len, buf)); -} - -/* - * __wt_remove -- - * POSIX remove. - */ -static inline int -__wt_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - return (WT_JUMP(j_file_remove, session, name)); -} - -/* - * __wt_rename -- - * POSIX rename. - */ -static inline int -__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - return (WT_JUMP(j_file_rename, session, from, to)); -} - -/* - * __wt_write -- - * POSIX pwrite. - */ -static inline int -__wt_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, - WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); - - WT_STAT_FAST_CONN_INCR(session, write_io); - - return (WT_JUMP(j_handle_write, session, fh, offset, len, buf)); -} - -/* - * __wt_vfprintf -- - * ANSI C vfprintf. - */ -static inline int -__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - return (WT_JUMP(j_handle_printf, session, fh, fmt, ap)); -} - -/* - * __wt_fprintf -- - * ANSI C fprintf. - */ -static inline int -__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) -{ - WT_DECL_RET; - va_list ap; - - va_start(ap, fmt); - ret = __wt_vfprintf(session, fh, fmt, ap); - va_end(ap); - - return (ret); -} + int (*fh_advise)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t, int); + int (*fh_close)(WT_SESSION_IMPL *, WT_FH *); + int (*fh_getc)(WT_SESSION_IMPL *, WT_FH *, int *); + int (*fh_lock)(WT_SESSION_IMPL *, WT_FH *, bool); + int (*fh_printf)(WT_SESSION_IMPL *, WT_FH *, const char *, va_list); + int (*fh_read)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *); + int (*fh_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); + int (*fh_sync)(WT_SESSION_IMPL *, WT_FH *, bool); + int (*fh_truncate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t); + int (*fh_write)( + WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, const void *); +}; diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index 2975920eb16..9e5007b38ed 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -339,6 +339,7 @@ union __wt_rand_state; #include "log.h" #include "lsm.h" #include "meta.h" +#include "os.h" #include "schema.h" #include "txn.h" @@ -356,7 +357,6 @@ union __wt_rand_state; #include "log.i" #include "misc.i" #include "mutex.i" /* required by btree.i */ -#include "os.h" /* requires connection.h */ #include "packing.i" #include "txn.i" /* required by btree.i */ diff --git a/src/log/log_auto.c b/src/log/log_auto.c index 1feace20e44..d4dab4e1a33 100644 --- a/src/log/log_auto.c +++ b/src/log/log_auto.c @@ -144,18 +144,18 @@ __wt_logop_col_put_print(WT_SESSION_IMPL *session, WT_RET(__wt_logop_col_put_unpack( session, pp, end, &fileid, &recno, &value)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"col_put\",\n")); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"recno\": \"%" PRIu64 "\",\n", recno)); WT_ERR(__logrec_make_json_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"value\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), ",\n \"value-hex\": \"%s\"", escaped)); } @@ -214,11 +214,11 @@ __wt_logop_col_remove_print(WT_SESSION_IMPL *session, WT_RET(__wt_logop_col_remove_unpack( session, pp, end, &fileid, &recno)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"col_remove\",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"recno\": \"%" PRIu64 "\"", recno)); return (0); } @@ -275,13 +275,13 @@ __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, WT_RET(__wt_logop_col_truncate_unpack( session, pp, end, &fileid, &start, &stop)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"col_truncate\",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"start\": \"%" PRIu64 "\",\n", start)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"stop\": \"%" PRIu64 "\"", stop)); return (0); } @@ -340,24 +340,24 @@ __wt_logop_row_put_print(WT_SESSION_IMPL *session, WT_RET(__wt_logop_row_put_unpack( session, pp, end, &fileid, &key, &value)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"row_put\",\n")); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"key\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"key-hex\": \"%s\",\n", escaped)); } WT_ERR(__logrec_make_json_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"value\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), ",\n \"value-hex\": \"%s\"", escaped)); } @@ -418,16 +418,16 @@ __wt_logop_row_remove_print(WT_SESSION_IMPL *session, WT_RET(__wt_logop_row_remove_unpack( session, pp, end, &fileid, &key)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"row_remove\",\n")); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"key\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), ",\n \"key-hex\": \"%s\"", escaped)); } @@ -490,27 +490,27 @@ __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, WT_RET(__wt_logop_row_truncate_unpack( session, pp, end, &fileid, &start, &stop, &mode)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"row_truncate\",\n")); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &start)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"start\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &start)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"start-hex\": \"%s\",\n", escaped)); } WT_ERR(__logrec_make_json_str(session, &escaped, &stop)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"stop\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &stop)); - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"stop-hex\": \"%s\",\n", escaped)); } - WT_ERR(__wt_fprintf(session, WT_STDOUT, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"mode\": \"%" PRIu32 "\"", mode)); err: __wt_free(session, escaped); diff --git a/src/os_posix/os_getline.c b/src/os_posix/os_getline.c index 3542ef4e76e..01e11581edf 100644 --- a/src/os_posix/os_getline.c +++ b/src/os_posix/os_getline.c @@ -31,7 +31,7 @@ __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh) WT_RET(__wt_buf_init(session, buf, 100)); for (;;) { - WT_RET(WT_JUMP(j_handle_getc, session, fh, &c)); + WT_RET(fh->fh_getc(session, fh, &c)); if (c == EOF) break; diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index fdc7be9dfc0..a1faaec2e45 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -129,7 +129,7 @@ __im_file_size( WT_UNUSED(silent); - im = __wt_process.inmemory; + im = S2C(session)->inmemory; __wt_spin_lock(session, &im->lock); if (__wt_handle_search(session, name, false, false, NULL, &fh)) { @@ -179,7 +179,7 @@ __im_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) { WT_IM *im; - im = __wt_process.inmemory; + im = S2C(session)->inmemory; __wt_spin_lock(session, &im->lock); if (fh->off >= fh->buf.size) @@ -204,25 +204,6 @@ __im_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) return (0); } -/* - * __im_handle_open -- - * POSIX fopen/open. - */ -static int -__im_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *path, int dio_type, u_int flags) -{ - WT_UNUSED(session); - WT_UNUSED(path); - WT_UNUSED(dio_type); - WT_UNUSED(flags); - - fh->off = 0; - F_SET(fh, WT_FH_IN_MEMORY); - - return (0); -} - /* * __im_handle_printf -- * ANSI C vfprintf. @@ -237,14 +218,7 @@ __im_handle_printf( WT_IM *im; size_t len; - im = __wt_process.inmemory; - - if (fh == WT_STDERR || fh == WT_STDOUT) { - if (vfprintf(fh == WT_STDERR ? stderr : stdout, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, - "%s: vfprintf", fh == WT_STDERR ? "stderr" : "stdout"); - } + im = S2C(session)->inmemory; /* Build the string we're writing. */ WT_RET(__wt_scr_alloc(session, strlen(fmt) * 2 + 128, &tmp)); @@ -286,7 +260,7 @@ __im_handle_read( WT_IM *im; size_t off; - im = __wt_process.inmemory; + im = S2C(session)->inmemory; __wt_spin_lock(session, &im->lock); off = (size_t)offset; @@ -326,15 +300,10 @@ __im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) static int __im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { + WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(block); - /* Flush any stream's stdio buffers. */ - if (fh == WT_STDERR || fh == WT_STDOUT) { - if (fflush(fh == WT_STDERR ? stderr : stdout) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), - "%s: fflush", fh == WT_STDERR ? "stderr" : "stdout"); - } return (0); } @@ -348,7 +317,7 @@ __im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) WT_DECL_RET; WT_IM *im; - im = __wt_process.inmemory; + im = S2C(session)->inmemory; __wt_spin_lock(session, &im->lock); WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)len)); @@ -371,7 +340,7 @@ __im_handle_write(WT_SESSION_IMPL *session, WT_IM *im; size_t off; - im = __wt_process.inmemory; + im = S2C(session)->inmemory; __wt_spin_lock(session, &im->lock); off = (size_t)offset; @@ -391,6 +360,36 @@ err: __wt_spin_unlock(session, &im->lock); fh->name, len, off); } +/* + * __im_handle_open -- + * POSIX fopen/open. + */ +static int +__im_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *path, int dio_type, u_int flags) +{ + WT_UNUSED(session); + WT_UNUSED(path); + WT_UNUSED(dio_type); + WT_UNUSED(flags); + + fh->off = 0; + F_SET(fh, WT_FH_IN_MEMORY); + + fh->fh_advise = __im_handle_advise; + fh->fh_close = __im_handle_close; + fh->fh_getc = __im_handle_getc; + fh->fh_lock = __im_handle_lock; + fh->fh_printf = __im_handle_printf; + fh->fh_read = __im_handle_read; + fh->fh_size = __im_handle_size; + fh->fh_sync = __im_handle_sync; + fh->fh_truncate = __im_handle_truncate; + fh->fh_write = __im_handle_write; + + return (0); +} + /* * __wt_os_inmemory -- * Initialize an in-memory configuration. @@ -398,34 +397,26 @@ err: __wt_spin_unlock(session, &im->lock); int __wt_os_inmemory(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_IM *im; + conn = S2C(session); im = NULL; + /* Initialize the in-memory jump table. */ + conn->file_directory_sync = __im_directory_sync; + conn->file_exist = __im_file_exist; + conn->file_remove = __im_file_remove; + conn->file_rename = __im_file_rename; + conn->file_size = __im_file_size; + conn->handle_open = __im_handle_open; + /* Allocate an in-memory structure. */ WT_RET(__wt_calloc_one(session, &im)); WT_ERR(__wt_spin_init(session, &im->lock, "in-memory I/O")); + conn->inmemory = im; - /* Initialize the in-memory jump table. */ - __wt_process.j_directory_sync = __im_directory_sync; - __wt_process.j_file_exist = __im_file_exist; - __wt_process.j_file_remove = __im_file_remove; - __wt_process.j_file_rename = __im_file_rename; - __wt_process.j_file_size = __im_file_size; - __wt_process.j_handle_advise = __im_handle_advise; - __wt_process.j_handle_close = __im_handle_close; - __wt_process.j_handle_getc = __im_handle_getc; - __wt_process.j_handle_lock = __im_handle_lock; - __wt_process.j_handle_open = __im_handle_open; - __wt_process.j_handle_printf = __im_handle_printf; - __wt_process.j_handle_read = __im_handle_read; - __wt_process.j_handle_size = __im_handle_size; - __wt_process.j_handle_sync = __im_handle_sync; - __wt_process.j_handle_truncate = __im_handle_truncate; - __wt_process.j_handle_write = __im_handle_write; - - __wt_process.inmemory = im; return (0); err: __wt_free(session, im); @@ -442,12 +433,11 @@ __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session) WT_DECL_RET; WT_IM *im; - if ((im = __wt_process.inmemory) == NULL) + if ((im = S2C(session)->inmemory) == NULL) return (0); - __wt_process.inmemory = NULL; + S2C(session)->inmemory = NULL; __wt_spin_destroy(session, &im->lock); - __wt_free(session, im); return (ret); diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 5cf0ddf0067..d9210dc12a4 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -137,11 +137,11 @@ __wt_open(WT_SESSION_IMPL *session, WT_STRING_MATCH(name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); /* Call the underlying open function. */ - WT_ERR(WT_JUMP(j_handle_open, session, fh, name, dio_type, flags)); + WT_ERR(conn->handle_open(session, fh, name, dio_type, flags)); open_called = true; /* Set the file's size. */ - WT_ERR(WT_JUMP(j_handle_size, session, fh, &fh->size)); + WT_ERR(fh->fh_size(session, fh, &fh->size)); /* * Repeat the check for a match: if there's no match, link our newly @@ -149,7 +149,7 @@ __wt_open(WT_SESSION_IMPL *session, */ if (__wt_handle_search(session, name, true, true, fh, fhp)) { err: if (open_called) - WT_TRET(WT_JUMP(j_handle_close, session, fh)); + WT_TRET(fh->fh_close(session, fh)); if (fh != NULL) { __wt_free(session, fh->name); __wt_free(session, fh); @@ -177,10 +177,6 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) fh = *fhp; *fhp = NULL; - /* Catch attempts to close the standard streams. */ - if (fh == WT_STDERR || fh == WT_STDOUT) - return (EINVAL); - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: close", fh->name)); /* @@ -204,7 +200,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) __wt_spin_unlock(session, &conn->fh_lock); /* Discard underlying resources. */ - ret = WT_JUMP(j_handle_close, session, fh); + ret = fh->fh_close(session, fh); __wt_free(session, fh->name); __wt_free(session, fh); diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index b6b2ca4c582..89235592aab 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -336,6 +336,137 @@ __posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) WT_RET_MSG(session, ret, "%s: fcntl", fh->name); } +/* + * __posix_handle_printf -- + * ANSI C vfprintf. + */ +static int +__posix_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: vfprintf", fh->name); +} + +/* + * __posix_handle_read -- + * POSIX pread. + */ +static int +__posix_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + size_t chunk; + ssize_t nr; + uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) + WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), + "%s read error: failed to read %" WT_SIZET_FMT + " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __posix_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + + WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); + if (ret == 0) { + *sizep = sb.st_size; + return (0); + } + WT_RET_MSG(session, ret, "%s: fstat", fh->name); +} + +/* + * __posix_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + if (fh->fp == NULL) + return (__posix_sync(session, fh->fd, fh->name, block)); + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: fflush", fh->name); +} + +/* + * __posix_handle_truncate -- + * POSIX ftruncate. + */ +static int +__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + + WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: ftruncate", fh->name); +} + +/* + * __posix_handle_write -- + * POSIX pwrite. + */ +static int +__posix_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + size_t chunk; + ssize_t nw; + const uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) + WT_RET_MSG(session, __wt_errno(), + "%s write error: failed to write %" WT_SIZET_FMT + " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + /* * __posix_handle_open -- * POSIX fopen/open. @@ -479,6 +610,18 @@ setupfh: __wt_free(session, path); fh->fd = fd; + + fh->fh_advise = __posix_handle_advise; + fh->fh_close = __posix_handle_close; + fh->fh_getc = __posix_handle_getc; + fh->fh_lock = __posix_handle_lock; + fh->fh_printf = __posix_handle_printf; + fh->fh_read = __posix_handle_read; + fh->fh_size = __posix_handle_size; + fh->fh_sync = __posix_handle_sync; + fh->fh_truncate = __posix_handle_truncate; + fh->fh_write = __posix_handle_write; + return (0); err: if (fd != -1) { @@ -492,152 +635,6 @@ err: if (fd != -1) { return (ret); } -/* - * __posix_handle_printf -- - * ANSI C vfprintf. - */ -static int -__posix_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - if (fh == WT_STDERR || fh == WT_STDOUT) { - if (vfprintf(fh == WT_STDERR ? stderr : stdout, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, - "%s: vfprintf", fh == WT_STDERR ? "stderr" : "stdout"); - } - - if (fh->fp == NULL) - WT_RET_MSG(session, ENOTSUP, - "%s: vfprintf: no stream configured", fh->name); - - if (vfprintf(fh->fp, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, "%s: vfprintf", fh->name); -} - -/* - * __posix_handle_read -- - * POSIX pread. - */ -static int -__posix_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - size_t chunk; - ssize_t nr; - uint8_t *addr; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) - WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), - "%s read error: failed to read %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __posix_handle_size -- - * Get the size of a file in bytes, by file handle. - */ -static int -__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - - WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); - if (ret == 0) { - *sizep = sb.st_size; - return (0); - } - WT_RET_MSG(session, ret, "%s: fstat", fh->name); -} - -/* - * __posix_handle_sync -- - * POSIX fflush/fsync. - */ -static int -__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) -{ - /* Flush any stream's stdio buffers. */ - if (fh == WT_STDERR || fh == WT_STDOUT) { - if (fflush(fh == WT_STDERR ? stderr : stdout) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), - "%s: fflush", fh == WT_STDERR ? "stderr" : "stdout"); - } - - if (fh->fp == NULL) - return (__posix_sync(session, fh->fd, fh->name, block)); - - if (fflush(fh->fp) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: fflush", fh->name); -} - -/* - * __posix_handle_truncate -- - * POSIX ftruncate. - */ -static int -__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - - WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: ftruncate", fh->name); -} - -/* - * __posix_handle_write -- - * POSIX pwrite. - */ -static int -__posix_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - size_t chunk; - ssize_t nw; - const uint8_t *addr; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) - WT_RET_MSG(session, __wt_errno(), - "%s write error: failed to write %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - /* * __wt_os_posix -- * Initialize a POSIX configuration. @@ -645,25 +642,17 @@ __posix_handle_write(WT_SESSION_IMPL *session, int __wt_os_posix(WT_SESSION_IMPL *session) { - WT_UNUSED(session); + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); /* Initialize the POSIX jump table. */ - __wt_process.j_directory_sync = __posix_directory_sync; - __wt_process.j_file_exist = __posix_file_exist; - __wt_process.j_file_remove = __posix_file_remove; - __wt_process.j_file_rename = __posix_file_rename; - __wt_process.j_file_size = __posix_file_size; - __wt_process.j_handle_advise = __posix_handle_advise; - __wt_process.j_handle_close = __posix_handle_close; - __wt_process.j_handle_getc = __posix_handle_getc; - __wt_process.j_handle_lock = __posix_handle_lock; - __wt_process.j_handle_open = __posix_handle_open; - __wt_process.j_handle_printf = __posix_handle_printf; - __wt_process.j_handle_read = __posix_handle_read; - __wt_process.j_handle_size = __posix_handle_size; - __wt_process.j_handle_sync = __posix_handle_sync; - __wt_process.j_handle_truncate = __posix_handle_truncate; - __wt_process.j_handle_write = __posix_handle_write; + conn->file_directory_sync = __posix_directory_sync; + conn->file_exist = __posix_file_exist; + conn->file_remove = __posix_file_remove; + conn->file_rename = __posix_file_rename; + conn->file_size = __posix_file_size; + conn->handle_open = __posix_handle_open; return (0); } diff --git a/src/os_posix/os_stdio.c b/src/os_posix/os_stdio.c new file mode 100644 index 00000000000..9e222ab8d98 --- /dev/null +++ b/src/os_posix/os_stdio.c @@ -0,0 +1,167 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __stdio_handle_advise -- + * POSIX fadvise. + */ +static int +__stdio_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + WT_RET_MSG(session, ENOTSUP, "%s: advise", fh->name); +} + +/* + * __stdio_handle_close -- + * ANSI C close/fclose. + */ +static int +__stdio_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_RET_MSG(session, ENOTSUP, "%s: close", fh->name); +} + +/* + * __stdio_handle_getc -- + * ANSI C fgetc. + */ +static int +__stdio_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + WT_UNUSED(chp); + WT_RET_MSG(session, ENOTSUP, "%s: getc", fh->name); +} + +/* + * __stdio_handle_lock -- + * Lock/unlock a file. + */ +static int +__stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_UNUSED(lock); + WT_RET_MSG(session, ENOTSUP, "%s: lock", fh->name); +} + +/* + * __stdio_handle_printf -- + * ANSI C vfprintf. + */ +static int +__stdio_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: vfprintf", fh->name); +} + +/* + * __stdio_handle_read -- + * POSIX pread. + */ +static int +__stdio_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(buf); + WT_RET_MSG(session, ENOTSUP, "%s: read", fh->name); +} + +/* + * __stdio_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__stdio_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_UNUSED(sizep); + WT_RET_MSG(session, ENOTSUP, "%s: size", fh->name); +} + +/* + * __stdio_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__stdio_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_UNUSED(block); + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: fflush", fh->name); +} + +/* + * __stdio_handle_truncate -- + * POSIX ftruncate. + */ +static int +__stdio_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: truncate", fh->name); +} + +/* + * __stdio_handle_write -- + * POSIX pwrite. + */ +static int +__stdio_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(buf); + WT_RET_MSG(session, ENOTSUP, "%s: write", fh->name); +} + +/* + * __stdio_func_init -- + * Initialize stdio functions. + */ +static void +__stdio_func_init(WT_FH *fh, const char *name, FILE *fp) +{ + fh->name = name; + fh->fp = fp; + + fh->fh_advise = __stdio_handle_advise; + fh->fh_close = __stdio_handle_close; + fh->fh_getc = __stdio_handle_getc; + fh->fh_lock = __stdio_handle_lock; + fh->fh_printf = __stdio_handle_printf; + fh->fh_read = __stdio_handle_read; + fh->fh_size = __stdio_handle_size; + fh->fh_sync = __stdio_handle_sync; + fh->fh_truncate = __stdio_handle_truncate; + fh->fh_write = __stdio_handle_write; +} + +/* + * __wt_os_stdio -- + * Initialize the stdio configuration. + */ +int +__wt_os_stdio(WT_SESSION_IMPL *session) +{ + __stdio_func_init(WT_STDERR(session), "stderr", stderr); + __stdio_func_init(WT_STDOUT(session), "stdout", stdout); + + return (0); +} diff --git a/src/support/err.c b/src/support/err.c index 040b1ef742f..f1f9c0c16e9 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -23,8 +23,8 @@ __handle_error_default(WT_EVENT_HANDLER *handler, session = (WT_SESSION_IMPL *)wt_session; - WT_RET(__wt_fprintf(session, WT_STDERR, "%s\n", errmsg)); - WT_RET(__wt_fsync(session, WT_STDERR, true)); + WT_RET(__wt_fprintf(session, WT_STDERR(session), "%s\n", errmsg)); + WT_RET(__wt_fsync(session, WT_STDERR(session), true)); return (0); } @@ -41,8 +41,8 @@ __handle_message_default(WT_EVENT_HANDLER *handler, WT_UNUSED(handler); session = (WT_SESSION_IMPL *)wt_session; - WT_RET(__wt_fprintf(session, WT_STDOUT, "%s\n", message)); - WT_RET(__wt_fsync(session, WT_STDOUT, true)); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "%s\n", message)); + WT_RET(__wt_fsync(session, WT_STDOUT(session), true)); return (0); } @@ -180,13 +180,13 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * example, we can end up here without a session.) */ if (session == NULL) { - WT_RET(__wt_fprintf(session, WT_STDERR, + WT_RET(__wt_fprintf(session, WT_STDERR(session), "WiredTiger Error%s%s: ", error == 0 ? "" : ": ", error == 0 ? "" : __wt_strerror(session, error, NULL, 0))); - WT_RET(__wt_vfprintf(session, WT_STDERR, fmt, ap)); - WT_RET(__wt_fprintf(session, WT_STDERR, "\n")); - return (__wt_fsync(session, WT_STDERR, true)); + WT_RET(__wt_vfprintf(session, WT_STDERR(session), fmt, ap)); + WT_RET(__wt_fprintf(session, WT_STDERR(session), "\n")); + return (__wt_fsync(session, WT_STDERR(session), true)); } p = s; diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 347aa5f9dfb..da2670fb344 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -74,21 +74,22 @@ __txn_commit_printlog(WT_SESSION_IMPL *session, bool firstrecord; firstrecord = true; - WT_RET(__wt_fprintf(session, WT_STDOUT, " \"ops\": [\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"ops\": [\n")); /* The logging subsystem zero-pads records. */ while (*pp < end && **pp) { if (!firstrecord) - WT_RET(__wt_fprintf(session, WT_STDOUT, ",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT, " {")); + WT_RET(__wt_fprintf( + session, WT_STDOUT(session), ",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " {")); firstrecord = false; WT_RET(__wt_txn_op_printlog(session, pp, end, flags)); - WT_RET(__wt_fprintf(session, WT_STDOUT, "\n }")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n }")); } - WT_RET(__wt_fprintf(session, WT_STDOUT, "\n ]\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n ]\n")); return (0); } @@ -484,16 +485,16 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_RET(__wt_logrec_read(session, &p, end, &rectype)); if (!firstrecord) - WT_RET(__wt_fprintf(session, WT_STDOUT, ",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), ",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " { \"lsn\" : [%" PRIu32 ",%" PRIu32 "],\n", lsnp->l.file, lsnp->l.offset)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"hdr_flags\" : \"%s\",\n", compressed ? "compressed" : "")); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"rec_len\" : %" PRIu32 ",\n", logrec->len)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"mem_len\" : %" PRIu32 ",\n", compressed ? logrec->mem_len : logrec->len)); @@ -501,18 +502,18 @@ __txn_printlog(WT_SESSION_IMPL *session, case WT_LOGREC_CHECKPOINT: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"type\" : \"checkpoint\",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"ckpt_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n", lsnfile, lsnoffset)); break; case WT_LOGREC_COMMIT: WT_RET(__wt_vunpack_uint(&p, WT_PTRDIFF(end, p), &txnid)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"type\" : \"commit\",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"txnid\" : %" PRIu64 ",\n", txnid)); WT_RET(__txn_commit_printlog(session, &p, end, args->flags)); break; @@ -520,25 +521,25 @@ __txn_printlog(WT_SESSION_IMPL *session, case WT_LOGREC_FILE_SYNC: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(Ii), &fileid, &start)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"type\" : \"file_sync\",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\" : %" PRIu32 ",\n", fileid)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"start\" : %" PRId32 "\n", start)); break; case WT_LOGREC_MESSAGE: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(S), &msg)); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"type\" : \"message\",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"message\" : \"%s\"\n", msg)); break; } - WT_RET(__wt_fprintf(session, WT_STDOUT, " }")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " }")); return (0); } @@ -556,10 +557,10 @@ __wt_txn_printlog(WT_SESSION *wt_session, uint32_t flags) session = (WT_SESSION_IMPL *)wt_session; args.flags = flags; - WT_RET(__wt_fprintf(session, WT_STDOUT, "[\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "[\n")); WT_RET(__wt_log_scan( session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, &args)); - WT_RET(__wt_fprintf(session, WT_STDOUT, "\n]\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n]\n")); return (0); } -- cgit v1.2.1 From e3ace5d86340e83ec0e612eec209b4e429d5c149 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 20 Mar 2016 16:35:40 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Fix for Linux build. --- src/include/misc.i | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/misc.i b/src/include/misc.i index b3d4f43a30c..2a0d0f2797b 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -96,7 +96,7 @@ __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); #ifdef __linux__ - return (fh->handle_sync(session, fh, true)); + return (fh->fh_sync(session, fh, true)); #else WT_UNUSED(fh); return (0); -- cgit v1.2.1 From 87fd222e2ea8b7ee1aa2c757626aad7257ba3e09 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 21 Mar 2016 07:10:57 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Make sure the cfg array is always NULL-terminated. --- src/conn/conn_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index fe6a782d08a..f3db3931cfc 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1118,7 +1118,8 @@ __conn_config_append(const char *cfg[], const char *config) { while (*cfg != NULL) ++cfg; - *cfg = config; + cfg[0] = config; + cfg[1] = NULL; } /* -- cgit v1.2.1 From 2c946de66f0ea053d8aa5e7432b82f7972e5a8b6 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 21 Mar 2016 09:57:40 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Move the read/write lock code from os/ to support/, it's not OS-specific. --- build_win/filelist.win | 2 +- dist/filelist | 2 +- src/include/extern.h | 16 +-- src/os_posix/os_mtx_rw.c | 367 ----------------------------------------------- src/support/mtx_rw.c | 367 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 377 insertions(+), 377 deletions(-) delete mode 100644 src/os_posix/os_mtx_rw.c create mode 100644 src/support/mtx_rw.c diff --git a/build_win/filelist.win b/build_win/filelist.win index df1efc23830..3d6b0a9193f 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -107,7 +107,6 @@ src/os_posix/os_getline.c src/os_posix/os_getopt.c src/os_posix/os_init.c src/os_posix/os_inmemory.c -src/os_posix/os_mtx_rw.c src/os_posix/os_posix.c src/os_posix/os_stdio.c src/os_posix/os_strtouq.c @@ -160,6 +159,7 @@ src/support/hash_fnv.c src/support/hazard.c src/support/hex.c src/support/huffman.c +src/support/mtx_rw.c src/support/pow.c src/support/rand.c src/support/scratch.c diff --git a/dist/filelist b/dist/filelist index e1595cbb5d8..4b2bf3439ee 100644 --- a/dist/filelist +++ b/dist/filelist @@ -114,7 +114,6 @@ src/os_posix/os_init.c src/os_posix/os_inmemory.c src/os_posix/os_map.c src/os_posix/os_mtx_cond.c -src/os_posix/os_mtx_rw.c src/os_posix/os_once.c src/os_posix/os_open.c src/os_posix/os_pagesize.c @@ -158,6 +157,7 @@ src/support/hash_fnv.c src/support/hazard.c src/support/hex.c src/support/huffman.c +src/support/mtx_rw.c src/support/pow.c src/support/power8/crc32.S src/support/power8/crc32_wrapper.c diff --git a/src/include/extern.h b/src/include/extern.h index 8c88b578591..91b853b2e4d 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -516,14 +516,6 @@ extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_s extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); -extern int __wt_rwlock_alloc( WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name); -extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp); extern int __wt_once(void (*init_routine)(void)); extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp); extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session); @@ -694,6 +686,14 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); extern int __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf); extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf); +extern int __wt_rwlock_alloc( WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name); +extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp); extern uint32_t __wt_nlpo2_round(uint32_t v); extern uint32_t __wt_nlpo2(uint32_t v); extern uint32_t __wt_log2_int(uint32_t n); diff --git a/src/os_posix/os_mtx_rw.c b/src/os_posix/os_mtx_rw.c deleted file mode 100644 index b6876cdfbdc..00000000000 --- a/src/os_posix/os_mtx_rw.c +++ /dev/null @@ -1,367 +0,0 @@ -/*- - * Public Domain 2014-2016 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Based on "Spinlocks and Read-Write Locks" by Dr. Steven Fuerst: - * http://locklessinc.com/articles/locks/ - * - * Dr. Fuerst further credits: - * There exists a form of the ticket lock that is designed for read-write - * locks. An example written in assembly was posted to the Linux kernel mailing - * list in 2002 by David Howells from RedHat. This was a highly optimized - * version of a read-write ticket lock developed at IBM in the early 90's by - * Joseph Seigh. Note that a similar (but not identical) algorithm was published - * by John Mellor-Crummey and Michael Scott in their landmark paper "Scalable - * Reader-Writer Synchronization for Shared-Memory Multiprocessors". - * - * The following is an explanation of this code. First, the underlying lock - * structure. - * - * struct { - * uint16_t writers; Now serving for writers - * uint16_t readers; Now serving for readers - * uint16_t users; Next available ticket number - * uint16_t __notused; Padding - * } - * - * First, imagine a store's 'take a number' ticket algorithm. A customer takes - * a unique ticket number and customers are served in ticket order. In the data - * structure, 'writers' is the next writer to be served, 'readers' is the next - * reader to be served, and 'users' is the next available ticket number. - * - * Next, consider exclusive (write) locks. The 'now serving' number for writers - * is 'writers'. To lock, 'take a number' and wait until that number is being - * served; more specifically, atomically copy and increment the current value of - * 'users', and then wait until 'writers' equals that copied number. - * - * Shared (read) locks are similar. Like writers, readers atomically get the - * next number available. However, instead of waiting for 'writers' to equal - * their number, they wait for 'readers' to equal their number. - * - * This has the effect of queuing lock requests in the order they arrive - * (incidentally avoiding starvation). - * - * Each lock/unlock pair requires incrementing both 'readers' and 'writers'. - * In the case of a reader, the 'readers' increment happens when the reader - * acquires the lock (to allow read-lock sharing), and the 'writers' increment - * happens when the reader releases the lock. In the case of a writer, both - * 'readers' and 'writers' are incremented when the writer releases the lock. - * - * For example, consider the following read (R) and write (W) lock requests: - * - * writers readers users - * 0 0 0 - * R: ticket 0, readers match OK 0 1 1 - * R: ticket 1, readers match OK 0 2 2 - * R: ticket 2, readers match OK 0 3 3 - * W: ticket 3, writers no match block 0 3 4 - * R: ticket 2, unlock 1 3 4 - * R: ticket 0, unlock 2 3 4 - * R: ticket 1, unlock 3 3 4 - * W: ticket 3, writers match OK 3 3 4 - * - * Note the writer blocks until 'writers' equals its ticket number and it does - * not matter if readers unlock in order or not. - * - * Readers or writers entering the system after the write lock is queued block, - * and the next ticket holder (reader or writer) will unblock when the writer - * unlocks. An example, continuing from the last line of the above example: - * - * writers readers users - * W: ticket 3, writers match OK 3 3 4 - * R: ticket 4, readers no match block 3 3 5 - * R: ticket 5, readers no match block 3 3 6 - * W: ticket 6, writers no match block 3 3 7 - * W: ticket 3, unlock 4 4 7 - * R: ticket 4, readers match OK 4 5 7 - * R: ticket 5, readers match OK 4 6 7 - * - * The 'users' field is a 2-byte value so the available ticket number wraps at - * 64K requests. If a thread's lock request is not granted until the 'users' - * field cycles and the same ticket is taken by another thread, we could grant - * a lock to two separate threads at the same time, and bad things happen: two - * writer threads or a reader thread and a writer thread would run in parallel, - * and lock waiters could be skipped if the unlocks race. This is unlikely, it - * only happens if a lock request is blocked by 64K other requests. The fix is - * to grow the lock structure fields, but the largest atomic instruction we have - * is 8 bytes, the structure has no room to grow. - */ - -#include "wt_internal.h" - -/* - * __wt_rwlock_alloc -- - * Allocate and initialize a read/write lock. - */ -int -__wt_rwlock_alloc( - WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name) -{ - WT_RWLOCK *rwlock; - - WT_RET(__wt_verbose(session, WT_VERB_MUTEX, "rwlock: alloc %s", name)); - - WT_RET(__wt_calloc_one(session, &rwlock)); - - rwlock->name = name; - - *rwlockp = rwlock; - return (0); -} - -/* - * __wt_try_readlock -- - * Try to get a shared lock, fail immediately if unavailable. - */ -int -__wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) -{ - wt_rwlock_t *l, new, old; - - WT_RET(__wt_verbose( - session, WT_VERB_MUTEX, "rwlock: try_readlock %s", rwlock->name)); - WT_STAT_FAST_CONN_INCR(session, rwlock_read); - - l = &rwlock->rwlock; - new = old = *l; - - /* - * This read lock can only be granted if the lock was last granted to - * a reader and there are no readers or writers blocked on the lock, - * that is, if this thread's ticket would be the next ticket granted. - * Do the cheap test to see if this can possibly succeed (and confirm - * the lock is in the correct state to grant this read lock). - */ - if (old.s.readers != old.s.users) - return (EBUSY); - - /* - * The replacement lock value is a result of allocating a new ticket and - * incrementing the reader value to match it. - */ - new.s.readers = new.s.users = old.s.users + 1; - return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); -} - -/* - * __wt_readlock -- - * Get a shared lock. - */ -int -__wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) -{ - wt_rwlock_t *l; - uint16_t ticket; - int pause_cnt; - - WT_RET(__wt_verbose( - session, WT_VERB_MUTEX, "rwlock: readlock %s", rwlock->name)); - WT_STAT_FAST_CONN_INCR(session, rwlock_read); - - l = &rwlock->rwlock; - - /* - * Possibly wrap: if we have more than 64K lockers waiting, the ticket - * value will wrap and two lockers will simultaneously be granted the - * lock. - */ - ticket = __wt_atomic_fetch_add16(&l->s.users, 1); - for (pause_cnt = 0; ticket != l->s.readers;) { - /* - * We failed to get the lock; pause before retrying and if we've - * paused enough, sleep so we don't burn CPU to no purpose. This - * situation happens if there are more threads than cores in the - * system and we're thrashing on shared resources. - * - * Don't sleep long when waiting on a read lock, hopefully we're - * waiting on another read thread to increment the reader count. - */ - if (++pause_cnt < WT_THOUSAND) - WT_PAUSE(); - else - __wt_sleep(0, 10); - } - - /* - * We're the only writer of the readers field, so the update does not - * need to be atomic. - */ - ++l->s.readers; - - return (0); -} - -/* - * __wt_readunlock -- - * Release a shared lock. - */ -int -__wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) -{ - wt_rwlock_t *l; - - WT_RET(__wt_verbose( - session, WT_VERB_MUTEX, "rwlock: read unlock %s", rwlock->name)); - - l = &rwlock->rwlock; - - /* - * Increment the writers value (other readers are doing the same, make - * sure we don't race). - */ - (void)__wt_atomic_add16(&l->s.writers, 1); - - return (0); -} - -/* - * __wt_try_writelock -- - * Try to get an exclusive lock, fail immediately if unavailable. - */ -int -__wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) -{ - wt_rwlock_t *l, new, old; - - WT_RET(__wt_verbose( - session, WT_VERB_MUTEX, "rwlock: try_writelock %s", rwlock->name)); - WT_STAT_FAST_CONN_INCR(session, rwlock_write); - - l = &rwlock->rwlock; - old = new = *l; - - /* - * This write lock can only be granted if the lock was last granted to - * a writer and there are no readers or writers blocked on the lock, - * that is, if this thread's ticket would be the next ticket granted. - * Do the cheap test to see if this can possibly succeed (and confirm - * the lock is in the correct state to grant this write lock). - */ - if (old.s.writers != old.s.users) - return (EBUSY); - - /* The replacement lock value is a result of allocating a new ticket. */ - ++new.s.users; - return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); -} - -/* - * __wt_writelock -- - * Wait to get an exclusive lock. - */ -int -__wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) -{ - wt_rwlock_t *l; - uint16_t ticket; - int pause_cnt; - - WT_RET(__wt_verbose( - session, WT_VERB_MUTEX, "rwlock: writelock %s", rwlock->name)); - WT_STAT_FAST_CONN_INCR(session, rwlock_write); - - l = &rwlock->rwlock; - - /* - * Possibly wrap: if we have more than 64K lockers waiting, the ticket - * value will wrap and two lockers will simultaneously be granted the - * lock. - */ - ticket = __wt_atomic_fetch_add16(&l->s.users, 1); - for (pause_cnt = 0; ticket != l->s.writers;) { - /* - * We failed to get the lock; pause before retrying and if we've - * paused enough, sleep so we don't burn CPU to no purpose. This - * situation happens if there are more threads than cores in the - * system and we're thrashing on shared resources. - */ - if (++pause_cnt < WT_THOUSAND) - WT_PAUSE(); - else - __wt_sleep(0, 10); - } - - return (0); -} - -/* - * __wt_writeunlock -- - * Release an exclusive lock. - */ -int -__wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) -{ - wt_rwlock_t *l, copy; - - WT_RET(__wt_verbose( - session, WT_VERB_MUTEX, "rwlock: writeunlock %s", rwlock->name)); - - l = &rwlock->rwlock; - - copy = *l; - - /* - * We're the only writer of the writers/readers fields, so the update - * does not need to be atomic; we have to update both values at the - * same time though, otherwise we'd potentially race with the thread - * next granted the lock. - * - * Use a memory barrier to ensure the compiler doesn't mess with these - * instructions and rework the code in a way that avoids the update as - * a unit. - */ - WT_BARRIER(); - - ++copy.s.writers; - ++copy.s.readers; - - l->i.wr = copy.i.wr; - - return (0); -} - -/* - * __wt_rwlock_destroy -- - * Destroy a read/write lock. - */ -int -__wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp) -{ - WT_RWLOCK *rwlock; - - rwlock = *rwlockp; /* Clear our caller's reference. */ - if (rwlock == NULL) - return (0); - *rwlockp = NULL; - - WT_RET(__wt_verbose( - session, WT_VERB_MUTEX, "rwlock: destroy %s", rwlock->name)); - - __wt_free(session, rwlock); - return (0); -} diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c new file mode 100644 index 00000000000..b6876cdfbdc --- /dev/null +++ b/src/support/mtx_rw.c @@ -0,0 +1,367 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Based on "Spinlocks and Read-Write Locks" by Dr. Steven Fuerst: + * http://locklessinc.com/articles/locks/ + * + * Dr. Fuerst further credits: + * There exists a form of the ticket lock that is designed for read-write + * locks. An example written in assembly was posted to the Linux kernel mailing + * list in 2002 by David Howells from RedHat. This was a highly optimized + * version of a read-write ticket lock developed at IBM in the early 90's by + * Joseph Seigh. Note that a similar (but not identical) algorithm was published + * by John Mellor-Crummey and Michael Scott in their landmark paper "Scalable + * Reader-Writer Synchronization for Shared-Memory Multiprocessors". + * + * The following is an explanation of this code. First, the underlying lock + * structure. + * + * struct { + * uint16_t writers; Now serving for writers + * uint16_t readers; Now serving for readers + * uint16_t users; Next available ticket number + * uint16_t __notused; Padding + * } + * + * First, imagine a store's 'take a number' ticket algorithm. A customer takes + * a unique ticket number and customers are served in ticket order. In the data + * structure, 'writers' is the next writer to be served, 'readers' is the next + * reader to be served, and 'users' is the next available ticket number. + * + * Next, consider exclusive (write) locks. The 'now serving' number for writers + * is 'writers'. To lock, 'take a number' and wait until that number is being + * served; more specifically, atomically copy and increment the current value of + * 'users', and then wait until 'writers' equals that copied number. + * + * Shared (read) locks are similar. Like writers, readers atomically get the + * next number available. However, instead of waiting for 'writers' to equal + * their number, they wait for 'readers' to equal their number. + * + * This has the effect of queuing lock requests in the order they arrive + * (incidentally avoiding starvation). + * + * Each lock/unlock pair requires incrementing both 'readers' and 'writers'. + * In the case of a reader, the 'readers' increment happens when the reader + * acquires the lock (to allow read-lock sharing), and the 'writers' increment + * happens when the reader releases the lock. In the case of a writer, both + * 'readers' and 'writers' are incremented when the writer releases the lock. + * + * For example, consider the following read (R) and write (W) lock requests: + * + * writers readers users + * 0 0 0 + * R: ticket 0, readers match OK 0 1 1 + * R: ticket 1, readers match OK 0 2 2 + * R: ticket 2, readers match OK 0 3 3 + * W: ticket 3, writers no match block 0 3 4 + * R: ticket 2, unlock 1 3 4 + * R: ticket 0, unlock 2 3 4 + * R: ticket 1, unlock 3 3 4 + * W: ticket 3, writers match OK 3 3 4 + * + * Note the writer blocks until 'writers' equals its ticket number and it does + * not matter if readers unlock in order or not. + * + * Readers or writers entering the system after the write lock is queued block, + * and the next ticket holder (reader or writer) will unblock when the writer + * unlocks. An example, continuing from the last line of the above example: + * + * writers readers users + * W: ticket 3, writers match OK 3 3 4 + * R: ticket 4, readers no match block 3 3 5 + * R: ticket 5, readers no match block 3 3 6 + * W: ticket 6, writers no match block 3 3 7 + * W: ticket 3, unlock 4 4 7 + * R: ticket 4, readers match OK 4 5 7 + * R: ticket 5, readers match OK 4 6 7 + * + * The 'users' field is a 2-byte value so the available ticket number wraps at + * 64K requests. If a thread's lock request is not granted until the 'users' + * field cycles and the same ticket is taken by another thread, we could grant + * a lock to two separate threads at the same time, and bad things happen: two + * writer threads or a reader thread and a writer thread would run in parallel, + * and lock waiters could be skipped if the unlocks race. This is unlikely, it + * only happens if a lock request is blocked by 64K other requests. The fix is + * to grow the lock structure fields, but the largest atomic instruction we have + * is 8 bytes, the structure has no room to grow. + */ + +#include "wt_internal.h" + +/* + * __wt_rwlock_alloc -- + * Allocate and initialize a read/write lock. + */ +int +__wt_rwlock_alloc( + WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name) +{ + WT_RWLOCK *rwlock; + + WT_RET(__wt_verbose(session, WT_VERB_MUTEX, "rwlock: alloc %s", name)); + + WT_RET(__wt_calloc_one(session, &rwlock)); + + rwlock->name = name; + + *rwlockp = rwlock; + return (0); +} + +/* + * __wt_try_readlock -- + * Try to get a shared lock, fail immediately if unavailable. + */ +int +__wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +{ + wt_rwlock_t *l, new, old; + + WT_RET(__wt_verbose( + session, WT_VERB_MUTEX, "rwlock: try_readlock %s", rwlock->name)); + WT_STAT_FAST_CONN_INCR(session, rwlock_read); + + l = &rwlock->rwlock; + new = old = *l; + + /* + * This read lock can only be granted if the lock was last granted to + * a reader and there are no readers or writers blocked on the lock, + * that is, if this thread's ticket would be the next ticket granted. + * Do the cheap test to see if this can possibly succeed (and confirm + * the lock is in the correct state to grant this read lock). + */ + if (old.s.readers != old.s.users) + return (EBUSY); + + /* + * The replacement lock value is a result of allocating a new ticket and + * incrementing the reader value to match it. + */ + new.s.readers = new.s.users = old.s.users + 1; + return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); +} + +/* + * __wt_readlock -- + * Get a shared lock. + */ +int +__wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +{ + wt_rwlock_t *l; + uint16_t ticket; + int pause_cnt; + + WT_RET(__wt_verbose( + session, WT_VERB_MUTEX, "rwlock: readlock %s", rwlock->name)); + WT_STAT_FAST_CONN_INCR(session, rwlock_read); + + l = &rwlock->rwlock; + + /* + * Possibly wrap: if we have more than 64K lockers waiting, the ticket + * value will wrap and two lockers will simultaneously be granted the + * lock. + */ + ticket = __wt_atomic_fetch_add16(&l->s.users, 1); + for (pause_cnt = 0; ticket != l->s.readers;) { + /* + * We failed to get the lock; pause before retrying and if we've + * paused enough, sleep so we don't burn CPU to no purpose. This + * situation happens if there are more threads than cores in the + * system and we're thrashing on shared resources. + * + * Don't sleep long when waiting on a read lock, hopefully we're + * waiting on another read thread to increment the reader count. + */ + if (++pause_cnt < WT_THOUSAND) + WT_PAUSE(); + else + __wt_sleep(0, 10); + } + + /* + * We're the only writer of the readers field, so the update does not + * need to be atomic. + */ + ++l->s.readers; + + return (0); +} + +/* + * __wt_readunlock -- + * Release a shared lock. + */ +int +__wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +{ + wt_rwlock_t *l; + + WT_RET(__wt_verbose( + session, WT_VERB_MUTEX, "rwlock: read unlock %s", rwlock->name)); + + l = &rwlock->rwlock; + + /* + * Increment the writers value (other readers are doing the same, make + * sure we don't race). + */ + (void)__wt_atomic_add16(&l->s.writers, 1); + + return (0); +} + +/* + * __wt_try_writelock -- + * Try to get an exclusive lock, fail immediately if unavailable. + */ +int +__wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +{ + wt_rwlock_t *l, new, old; + + WT_RET(__wt_verbose( + session, WT_VERB_MUTEX, "rwlock: try_writelock %s", rwlock->name)); + WT_STAT_FAST_CONN_INCR(session, rwlock_write); + + l = &rwlock->rwlock; + old = new = *l; + + /* + * This write lock can only be granted if the lock was last granted to + * a writer and there are no readers or writers blocked on the lock, + * that is, if this thread's ticket would be the next ticket granted. + * Do the cheap test to see if this can possibly succeed (and confirm + * the lock is in the correct state to grant this write lock). + */ + if (old.s.writers != old.s.users) + return (EBUSY); + + /* The replacement lock value is a result of allocating a new ticket. */ + ++new.s.users; + return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); +} + +/* + * __wt_writelock -- + * Wait to get an exclusive lock. + */ +int +__wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +{ + wt_rwlock_t *l; + uint16_t ticket; + int pause_cnt; + + WT_RET(__wt_verbose( + session, WT_VERB_MUTEX, "rwlock: writelock %s", rwlock->name)); + WT_STAT_FAST_CONN_INCR(session, rwlock_write); + + l = &rwlock->rwlock; + + /* + * Possibly wrap: if we have more than 64K lockers waiting, the ticket + * value will wrap and two lockers will simultaneously be granted the + * lock. + */ + ticket = __wt_atomic_fetch_add16(&l->s.users, 1); + for (pause_cnt = 0; ticket != l->s.writers;) { + /* + * We failed to get the lock; pause before retrying and if we've + * paused enough, sleep so we don't burn CPU to no purpose. This + * situation happens if there are more threads than cores in the + * system and we're thrashing on shared resources. + */ + if (++pause_cnt < WT_THOUSAND) + WT_PAUSE(); + else + __wt_sleep(0, 10); + } + + return (0); +} + +/* + * __wt_writeunlock -- + * Release an exclusive lock. + */ +int +__wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +{ + wt_rwlock_t *l, copy; + + WT_RET(__wt_verbose( + session, WT_VERB_MUTEX, "rwlock: writeunlock %s", rwlock->name)); + + l = &rwlock->rwlock; + + copy = *l; + + /* + * We're the only writer of the writers/readers fields, so the update + * does not need to be atomic; we have to update both values at the + * same time though, otherwise we'd potentially race with the thread + * next granted the lock. + * + * Use a memory barrier to ensure the compiler doesn't mess with these + * instructions and rework the code in a way that avoids the update as + * a unit. + */ + WT_BARRIER(); + + ++copy.s.writers; + ++copy.s.readers; + + l->i.wr = copy.i.wr; + + return (0); +} + +/* + * __wt_rwlock_destroy -- + * Destroy a read/write lock. + */ +int +__wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp) +{ + WT_RWLOCK *rwlock; + + rwlock = *rwlockp; /* Clear our caller's reference. */ + if (rwlock == NULL) + return (0); + *rwlockp = NULL; + + WT_RET(__wt_verbose( + session, WT_VERB_MUTEX, "rwlock: destroy %s", rwlock->name)); + + __wt_free(session, rwlock); + return (0); +} -- cgit v1.2.1 From 811487a4b7576140e2a406786d62caa3c8b3e6a5 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 21 Mar 2016 15:30:58 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Make fallocate a WT_FH handle method, and directory-list a WT_CONNECTION_IMPL handle method. --- src/include/connection.h | 4 +++- src/include/extern.h | 6 +++--- src/include/misc.i | 26 ++++++++++++++++++++++++++ src/include/os.h | 1 + src/os_posix/os_dir.c | 11 ++++++----- src/os_posix/os_fallocate.c | 39 ++++++++++++++++++--------------------- src/os_posix/os_inmemory.c | 19 +++++++++++++++++++ src/os_posix/os_open.c | 3 --- src/os_posix/os_posix.c | 5 +++++ src/os_posix/os_stdio.c | 14 ++++++++++++++ 10 files changed, 95 insertions(+), 33 deletions(-) diff --git a/src/include/connection.h b/src/include/connection.h index c578ecf0fb8..7fcfe82f17f 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -431,7 +431,9 @@ struct __wt_connection_impl { * OS library/system call jump table, to support in-memory and readonly * configurations as well as special devices with other non-POSIX APIs. */ - int (*file_directory_sync)(WT_SESSION_IMPL *, const char *path); + int (*file_directory_list)(WT_SESSION_IMPL *, + const char *, const char *, uint32_t, char ***, u_int *); + int (*file_directory_sync)(WT_SESSION_IMPL *, const char *); int (*file_exist)(WT_SESSION_IMPL *, const char *, bool *); int (*file_remove)(WT_SESSION_IMPL *, const char *); int (*file_rename)(WT_SESSION_IMPL *, const char *, const char *); diff --git a/src/include/extern.h b/src/include/extern.h index 91b853b2e4d..f3ccefd5225 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -492,15 +492,15 @@ extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocate extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); -extern int __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); +extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); extern int __wt_errno(void); extern int __wt_map_error_rdonly(int error); extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); -extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); +extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); +extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); diff --git a/src/include/misc.i b/src/include/misc.i index 2a0d0f2797b..315878648c7 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -71,6 +71,18 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...) #endif } +/* + * __wt_dirlist -- + * Get a list of files from a directory. + */ +static inline int +__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) +{ + return (S2C(session)->file_directory_list( + session, dir, prefix, flags, dirlist, countp)); +} + /* * __wt_directory_sync -- * Flush a directory to ensure file creation is durable. @@ -113,6 +125,20 @@ __wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) return (S2C(session)->file_exist(session, name, existp)); } +/* + * __wt_fallocate -- + * Extend a file. + */ +static inline int +__wt_fallocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + return (fh->fh_allocate(session, fh, offset, len)); +} + /* * __wt_posix_fadvise -- * POSIX fadvise. diff --git a/src/include/os.h b/src/include/os.h index 2e2f63da6ac..0e27ea96d23 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -110,6 +110,7 @@ struct __wt_fh { uint32_t flags; int (*fh_advise)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t, int); + int (*fh_allocate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t); int (*fh_close)(WT_SESSION_IMPL *, WT_FH *); int (*fh_getc)(WT_SESSION_IMPL *, WT_FH *, int *); int (*fh_lock)(WT_SESSION_IMPL *, WT_FH *, bool); diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c index d806c270860..a8a8e7d7aaa 100644 --- a/src/os_posix/os_dir.c +++ b/src/os_posix/os_dir.c @@ -11,13 +11,12 @@ #include /* - * __wt_dirlist -- - * Get a list of files from a directory, optionally filtered by - * a given prefix. + * __wt_posix_directory_list -- + * Get a list of files from a directory, POSIX version. */ int -__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, - uint32_t flags, char ***dirlist, u_int *countp) +__wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) { struct dirent *dp; DIR *dirp; @@ -57,6 +56,8 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, strcmp(dp->d_name, "..") == 0) continue; match = false; + + /* The list of files is optionally filtered by a prefix. */ if (prefix != NULL && ((LF_ISSET(WT_DIRLIST_INCLUDE) && WT_PREFIX_MATCH(dp->d_name, prefix)) || diff --git a/src/os_posix/os_fallocate.c b/src/os_posix/os_fallocate.c index dbe5857e195..df4d76c3b3a 100644 --- a/src/os_posix/os_fallocate.c +++ b/src/os_posix/os_fallocate.c @@ -13,11 +13,11 @@ #include #endif /* - * __wt_fallocate_config -- - * Configure file-extension behavior for a file handle. + * __wt_posix_handle_allocate_configure -- + * Configure POSIX file-extension behavior for a file handle. */ void -__wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh) +__wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) { WT_UNUSED(session); @@ -40,11 +40,11 @@ __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh) } /* - * __wt_std_fallocate -- + * __posix_std_fallocate -- * Linux fallocate call. */ static int -__wt_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) { #if defined(HAVE_FALLOCATE) WT_DECL_RET; @@ -60,11 +60,11 @@ __wt_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) } /* - * __wt_sys_fallocate -- + * __posix_sys_fallocate -- * Linux fallocate call (system call version). */ static int -__wt_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) { #if defined(__linux__) && defined(SYS_fallocate) WT_DECL_RET; @@ -86,11 +86,11 @@ __wt_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) } /* - * __wt_posix_fallocate -- + * __posix_posix_fallocate -- * POSIX fallocate call. */ static int -__wt_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) { #if defined(HAVE_POSIX_FALLOCATE) WT_DECL_RET; @@ -106,18 +106,15 @@ __wt_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) } /* - * __wt_fallocate -- - * Extend a file. + * __wt_posix_handle_allocate -- + * POSIX fallocate. */ int -__wt_fallocate( +__wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) { WT_DECL_RET; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - switch (fh->fallocate_available) { /* * Check for already configured handles and make the configured call. @@ -125,19 +122,19 @@ __wt_fallocate( case WT_FALLOCATE_POSIX: WT_RET(__wt_verbose( session, WT_VERB_FILEOPS, "%s: posix_fallocate", fh->name)); - if ((ret = __wt_posix_fallocate(fh, offset, len)) == 0) + if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: posix_fallocate", fh->name); case WT_FALLOCATE_STD: WT_RET(__wt_verbose( session, WT_VERB_FILEOPS, "%s: fallocate", fh->name)); - if ((ret = __wt_std_fallocate(fh, offset, len)) == 0) + if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: fallocate", fh->name); case WT_FALLOCATE_SYS: WT_RET(__wt_verbose( session, WT_VERB_FILEOPS, "%s: sys_fallocate", fh->name)); - if ((ret = __wt_sys_fallocate(fh, offset, len)) == 0) + if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: sys_fallocate", fh->name); @@ -154,17 +151,17 @@ __wt_fallocate( * fallocate (and the system call version of fallocate) first to * avoid locking on Linux if at all possible. */ - if ((ret = __wt_std_fallocate(fh, offset, len)) == 0) { + if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) { fh->fallocate_available = WT_FALLOCATE_STD; fh->fallocate_requires_locking = false; return (0); } - if ((ret = __wt_sys_fallocate(fh, offset, len)) == 0) { + if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) { fh->fallocate_available = WT_FALLOCATE_SYS; fh->fallocate_requires_locking = false; return (0); } - if ((ret = __wt_posix_fallocate(fh, offset, len)) == 0) { + if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) { fh->fallocate_available = WT_FALLOCATE_POSIX; #if !defined(__linux__) fh->fallocate_requires_locking = false; diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index a1faaec2e45..825d33becfa 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -15,6 +15,24 @@ typedef struct { WT_SPINLOCK lock; } WT_IM; +/* + * __im_directory_list -- + * Get a list of files from a directory, in-memory version. + */ +static int +__im_directory_list(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) +{ + WT_UNUSED(session); + WT_UNUSED(dir); + WT_UNUSED(prefix); + WT_UNUSED(flags); + WT_UNUSED(dirlist); + WT_UNUSED(countp); + + WT_RET_MSG(session, ENOTSUP, "directory-list"); +} + /* * __im_directory_sync -- * Flush a directory to ensure file creation is durable. @@ -405,6 +423,7 @@ __wt_os_inmemory(WT_SESSION_IMPL *session) im = NULL; /* Initialize the in-memory jump table. */ + conn->file_directory_list = __im_directory_list; conn->file_directory_sync = __im_directory_sync; conn->file_exist = __im_file_exist; conn->file_remove = __im_file_remove; diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index d9210dc12a4..4cd41114b9f 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -117,9 +117,6 @@ __wt_open(WT_SESSION_IMPL *session, WT_ERR(__wt_calloc_one(session, &fh)); WT_ERR(__wt_strdup(session, name, &fh->name)); - /* Configure fallocate/posix_fallocate calls. */ - __wt_fallocate_config(session, fh); - /* * If this is a read-only connection, open all files read-only except * the lock file. diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index 89235592aab..a12dd437fe7 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -611,7 +611,11 @@ setupfh: __wt_free(session, path); fh->fd = fd; + /* Configure fallocate/posix_fallocate calls. */ + __wt_posix_handle_allocate_configure(session, fh); + fh->fh_advise = __posix_handle_advise; + fh->fh_allocate = __wt_posix_handle_allocate; fh->fh_close = __posix_handle_close; fh->fh_getc = __posix_handle_getc; fh->fh_lock = __posix_handle_lock; @@ -647,6 +651,7 @@ __wt_os_posix(WT_SESSION_IMPL *session) conn = S2C(session); /* Initialize the POSIX jump table. */ + conn->file_directory_list = __wt_posix_directory_list; conn->file_directory_sync = __posix_directory_sync; conn->file_exist = __posix_file_exist; conn->file_remove = __posix_file_remove; diff --git a/src/os_posix/os_stdio.c b/src/os_posix/os_stdio.c index 9e222ab8d98..98242773217 100644 --- a/src/os_posix/os_stdio.c +++ b/src/os_posix/os_stdio.c @@ -22,6 +22,19 @@ __stdio_handle_advise(WT_SESSION_IMPL *session, WT_RET_MSG(session, ENOTSUP, "%s: advise", fh->name); } +/* + * __stdio_handle_allocate -- + * POSIX fallocate. + */ +static int +__stdio_handle_allocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: allocate", fh->name); +} + /* * __stdio_handle_close -- * ANSI C close/fclose. @@ -142,6 +155,7 @@ __stdio_func_init(WT_FH *fh, const char *name, FILE *fp) fh->fp = fp; fh->fh_advise = __stdio_handle_advise; + fh->fh_allocate = __stdio_handle_allocate; fh->fh_close = __stdio_handle_close; fh->fh_getc = __stdio_handle_getc; fh->fh_lock = __stdio_handle_lock; -- cgit v1.2.1 From c72e16f2f6a6ad71f5a2910296f04d71f51f025c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 21 Mar 2016 15:43:41 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Lint on the Linux build. --- src/include/misc.i | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/include/misc.i b/src/include/misc.i index 315878648c7..755a22df977 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -150,6 +150,11 @@ __wt_posix_fadvise(WT_SESSION_IMPL *session, #if defined(HAVE_POSIX_FADVISE) return (fh->fh_advise(session, fh, offset, len, advice)); #else + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); return (0); #endif } -- cgit v1.2.1 From 5e6f071cef98767846e6ec046fa498667dd7149e Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 22 Mar 2016 15:34:15 +1100 Subject: WT-2507 Add upgrading documentation for LSN change. --- src/docs/upgrading.dox | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index 8b3d61e4c19..5c5be214915 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -49,6 +49,13 @@ bulk-loaded, so they do not get named checkpoints that complete during the bulk load. +
Remove WT_LSN structure from public API
+
+The WiredTiger public API used to define a structure that could encapsulate +log sequence numbers. That structure is no longer exposed publicly. +
+ +

@section version_270 Upgrading to Version 2.7.0 -- cgit v1.2.1 From d16256ae1e007ff285cc2bdc3b3a9c1f5f11a0ef Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 23 Mar 2016 16:13:00 +1100 Subject: Cut WiredTiger WT-2.8.0 release --- NEWS | 561 +++++++++++++++++++++++-------------- README | 6 +- RELEASE_INFO | 4 +- build_posix/aclocal/version-set.m4 | 8 +- build_posix/aclocal/version.m4 | 2 +- dist/package/wiredtiger.spec | 2 +- dist/s_string.ok | 1 + src/docs/top/main.dox | 6 +- 8 files changed, 373 insertions(+), 217 deletions(-) diff --git a/NEWS b/NEWS index 546d08b2418..4373d69c087 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,159 @@ +WiredTiger release 2.8.0, 2015-22-03 +------------------------------------ + +The WiredTiger 2.8.0 release contains new features, minor API changes and bug +fixes. + +New features and API changes; refer to the API documentation for full details: + +WT-60 Port WiredTiger to run on big endian platforms +WT-2287 Add a new WT_SESSION.rebalance API +WT-2333 Add a no_wait configuration setting to WT_SESSION.drop so it doesn't block +WT-2349 Add a readonly configuration setting to wiredtiger_open +WT-2363 Remove built in support for bzip2 compression +WT-2404 Add streaming pack/unpack methods to the extension API + +Significant changes and bug fixes: + +WT-1801 Add a directory sync after rollback of a WT_SESSION::rename operation +WT-2130 Improve on-disk page utilization with random workloads +WT-2275 Fix a database corruption after application crash +WT-2264 High update workloads can cause checkpoints to never complete +WT-2290 Improve how effective WT_SESSION.compact is +WT-2361 Fix a bug in column-store where verify identifies out of order data +WT-2367 Fix a bug in WT_CURSOR.next that could cause out-of-order key returns +WT-2374 Fix a bug where a database was corrupted after a hard crash +WT-2451 Allow the WiredTiger metadata to be evicted +WT-2490 Fix a bug in column-store where search_near() returns the wrong key + +Issues fixed in MongoDB: + +SERVER-21619 sys-perf: WT crash during core_workloads_WT execution +SERVER-21833 Enhance WT_SESSION::compact to more reliably release space +SERVER-21887 Enhance $sample to be faster on newly created collection +SERVER-22676 Fix a bug in WiredTiger fails to open databases created by 3.0.0 or 3.0.1 +SERVER-22773 CRC32 implementation on PPC was broken +SERVER-22831 Low query rate with heavy cache pressure and an idle collection + +Other note worthy changes since the previous release: + +WT-1517 Fix error handling around schema format edge cases +WT-2060 Simplify aggregation of statistics +WT-2073 Metadata cleanups +WT-2099 Seeing memory underflow messages +WT-2107 Add example code including an event handler +WT-2113 Truncate test occasionally fails with unexpected EBUSY +WT-2123 Don't clear allocated memory if not required +WT-2173 Fix some cases where tiny caches could get stuck full +WT-2177 Add an optional per-thread seed to random number generator +WT-2198 Bulk load and column store appends +WT-2215 WT_LSN needs to support atomic reads and updates +WT-2216 Simplify row-store search loop slightly +WT-2231 Pinned page cursor searches could check parent keys +WT-2235 Add a unicode option to WiredTiger printlog utility +WT-2242 WiredTiger treats dead trees the same as other trees in eviction +WT-2246 Improve performance for column-store append searches +WT-2247 Variable-length column-store in-memory page splits +WT-2258 Stop WiredTiger pre-loading pages when direct-IO is configured +WT-2259 Fix error handling when getting exclusive access to a btree +WT-2262 Fix random cursor next so it is not skewed by tree shape +WT-2265 WiredTiger related change in PPC specific code block in gcc.h +WT-2272 Fix a bug in the sweep server that triggered an assertion +WT-2276 Add a tool to decode checkpoint addr +WT-2277 Remove WT check against big-endian systems +WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined +WT-2280 Add CRC32 Optimized code for PPC +WT-2282 Error in wt_txn_update_oldest verbose message test +WT-2283 Retry in txn_update_oldest results in a hang +WT-2285 Enhance configure to set BUFFER_ALIGNMENT_DEFAULT to 4kb on Linux +WT-2289 Fix a bug in btree search when doing a fast key check +WT-2291 Random cursor walk inefficient in skip list only trees +WT-2295 WT_SESSION.create does a full-scan of the main table +WT-2296 Improve log algorithm for sync/flush settings +WT-2297 Fix off-by-one error in Huffman config file parsing +WT-2299 Clean up layering violation between btree and block manager code +WT-2307 Fix a bug where internal page splits can corrupt cursor iteration +WT-2308 Add support for custom extractor for ref_cursors in join cursor +WT-2311 Add support for UltraSparc platform +WT-2312 Fix a bug where re-creating a deleted column-store page can corrupt the in-memory tree +WT-2313 Fix a bug in the sweep server +WT-2314 Update page-swap error handling so that it is consistent +WT-2316 Fix a bug in WT_CURSOR.prev where it could return keys out-of-order +WT-2318 Enhance condition wait implementation to use less CPU on idle databases +WT-2321 Fix a race between eviction and worker threads on the eviction queue +WT-2322 Fix a bug in read-uncommitted join cursors where using Bloom filters is unsafe +WT-2328 Update schema drop does to use the block manager interface for file removal +WT-2331 Checking of search result for reference cursors before join +WT-2332 Fix a bug in logging write-no-sync mode +WT-2335 Fix a bug where parsing an invalid configuration string could segfault +WT-2338 Disable using pre-allocated log files when a backup cursor is open +WT-2339 Fix a bug in rebalance that caused database verification failure +WT-2340 Add logging guarantee assertions +WT-2345 Avoid creating tiny pages on disk when evicting small pages from cache +WT-2346 Enhance checkpoint implementation so the schema lock is not held during I/O +WT-2347 Fix some schema format edge cases in Java API +WT-2352 Allow build and test without requiring lz4 +WT-2355 Fix minor scratch buffer usage in logging +WT-2356 log scan advances to next log file on partially written record +WT-2368 Fix a bug where row-store can pass invalid keys to collator functions +WT-2369 Use C compiler to detect headers instead of C++ compiler +WT-2371 Fix a bug where parent split cannot access the page after page-index swap +WT-2372 WiredTiger windows builder fails with C4005 against the "inline" macro +WT-2375 Add tests for custom collators +WT-2378 Fix a hang in LSM when doing forced drop with the no wait option +WT-2381 Fix the dump utility so it does not discard the table configuration +WT-2382 Fix a bug in join cursors with custom collator for 'u' format +WT-2384 Fix a bug in join cursors where lt, le conditions for ordering could be wrong +WT-2387 Fix cursor random unit test on Windows +WT-2390 Fix the OS X build +WT-2391 Enhance eviction so that it is less likely to evict pages from indexes +WT-2394 Fix a bug in compact that meant we didn't always reclaim available space +WT-2395 Fix a recovery failure with an LSM tree +WT-2396 Fix a deadlock between table drop and checkpoint +WT-2397 Fix a bug in cursor traversal where doing a reverse walk could skip records. +WT-2399 Add test case that verifies cursor traversal +WT-2409 Fix a minor performance regression in LSM +WT-2410 Stop casting function pointers to different types +WT-2411 Fix a hang in LSM related to dropping tables +WT-2414 Avoid extractor calls for ordering cursor in join cursor +WT-2417 Windows Jenkins task is failing +WT-2418 Fix a bug in WT_SESSION.rebalance where it could return EBUSY +WT-2420 Fix a bug in LSM where recovery from a backup could fail +WT-2423 Fix a bug in session reference counting on error handling +WT-2425 Fix a performance regression in wtperf evict-btree read workload +WT-2426 Fix a deadlock caused by recent changes to checkpoint handle locking +WT-2428 Make statistics logging compatible with MongoDB +WT-2429 Add a statistic that tracks aggressive mode in eviction +WT-2430 Add statistics for join cursor +WT-2432 Fix a performance regression on LSM and read only workloads +WT-2433 Allow read-only databases to log statistics +WT-2434 Fix a race between force-drop and sweep +WT-2436 Fix a bug in join cursors with lt, le conditions and "strategy=bloom" +WT-2438 Extend WiredTiger stat declarations to help external tools +WT-2440 Fix a bug in the PPC checksum implementation +WT-2443 Add statistics for all indexes used in join cursor +WT-2447 Enhance join cursor implementation to avoid reading main table where possible +WT-2448 Add no_scale flag to relevant statistics +WT-2449 Enhance configure to check for a 64-bit build +WT-2454 Fix checkpoint_sync=false behavior to prevent flushes/sync to disk +WT-2456 Fix PPC CRC32 Code +WT-2457 Fix a bug where dropping an LSM table can fail with EBUSY when no user ops are active +WT-2459 Allow Configure scripts to provide the --tag option for libtool when compiling on PPC +WT-2460 Fix a bug where checkpoint could fail with WT_ROLLBACK +WT-2471 Update WiredTiger printf formats to be platform aware +WT-2476 Fix a race where btree->evict_lock is being accessed after being destroyed +WT-2481 Fix a recently introduced performance regression in LSM +WT-2483 Make read only testing more robust +WT-2485 Fix a test/format failure with floating point exception +WT-2492 Fix a bug in Windows where we used the different memory allocators accidentally +WT-2495 Missing memory initialization leads to crash on Windows +WT-2496 Fix a bug revealed by test/format unable to read root page +WT-2497 Enhance test/format to save a copy of backup +WT-2498 Fix a bug in LSM tree drop where it could hang when a user cursor is open +WT-2499 Fix a bug in LSM shutdown where a race condition causes a segfault +WT-2501 Fix a bug where dropping a just opened LSM tree isn't thread safe +WT-2502 Fix a memory leak in locking handles for checkpoint + WiredTiger release 2.7.0, 2015-12-08 ------------------------------------ @@ -6,222 +162,221 @@ fixes. New features and API changes; refer to the API documentation for full details: -* 959376c WT-147: Create indexes on non-empty tables. -* 4368d39 WT-1315: Add an implementation of cursor joins via a new WT_SESSION::join API. -* 944ccd1 WT-1350: Add a new configuration option to ::wiredtiger_open and - WT_CONNECTION::reconfigure called "eviction_dirty_trigger" that causes eviction to start evicting - dirty pages from cache once the given threshold has been reached. -* ab5a8fb WT-1728: Add a WT_SESSION::reset method to release resources held by a session. -* 263c5b7 WT-1930: Allow setting "file_manager=(close_idle_time=0)" to ::wiredtiger_open and - WT_CONNECTION::reconfigure to disable closing idle handles. -* 6310c3f WT-1959: Change verify to distinguish between warnings and errors. Add a new strict mode - to verify that causes warnings to be reported as errors. Use strict mode to match earlier - behavior. See the upgrading documentation for more information. -* e0d6229 WT-1980: Add a new "metadata:create" URI to WT_SESSION::open_cursor for metadata cursors - that return strings useful for passing to WT_SESSION::create. -* 292712e WT-2065: Add a new configuration option to ::wiredtiger_open and - WT_CONNECTION::reconfigure called "shared_cache=(quota)" that limits the amount of shared cache a - participant can be assigned. -* 4d0ebf4 WT-2104: Add a method to flush log files via a new WT_SESSION::log_flush API. Made - WT_SESSION::commit_transaction configuration options match WT_SESSION::log_flush. Change the - default WT_SESSION::transaction_sync timeout to 20 minutes rather than infinity. -* 21b8330 WT-2151: Enhance logging configuration to allow reconfiguration and add a new - "log=(zero_fill)" configuration option that causes WiredTiger to zero-fill log files on creation. -* 368b307 WT-2200: Add a new configuration option to ::wiredtiger_open called "write_through" that - causes WiredTiger to specify the FILE_FLAG_WRITE_THROUGH on Windows when writing files (default - false, including when "direct_io" is configured). -* 08c0fcd WT-2217: After a successful call to WT_CURSOR::insert, the key and value will be - cleared from the cursor. See the upgrading documentation for more information. -* d4fc69a SERVER-17078: Add a "statistics=(size)" mode to statistics cursors, which allows for - retrieving file size only. -* b83b901 SERVER-18356: Changed the handling of the "config_base" option to ::wiredtiger_open. See - upgrading documentation for more information. +WT-147 Create indexes on non-empty tables. +WT-1315 Add an implementation of cursor joins via a new WT_SESSION::join API. +WT-1350 Add a new configuration option to ::wiredtiger_open and + WT_CONNECTION::reconfigure called "eviction_dirty_trigger" that causes eviction to start + evicting dirty pages from cache once the given threshold has been reached. +WT-1728 Add a WT_SESSION::reset method to release resources held by a session. +WT-1930 Allow setting "file_manager=(close_idle_time=0)" to ::wiredtiger_open and + WT_CONNECTION::reconfigure to disable closing idle handles. +WT-1959 Change verify to distinguish between warnings and errors. Add a new strict mode + to verify that causes warnings to be reported as errors. Use strict mode to match earlier + behavior. See the upgrading documentation for more information. +WT-1980 Add a new "metadata:create" URI to WT_SESSION::open_cursor for metadata cursors + that return strings useful for passing to WT_SESSION::create. +WT-2065 Add a new configuration option to ::wiredtiger_open and + WT_CONNECTION::reconfigure called "shared_cache=(quota)" that limits the amount of shared + cache a participant can be assigned. +WT-2104 Add a method to flush log files via a new WT_SESSION::log_flush API. Made + WT_SESSION::commit_transaction configuration options match WT_SESSION::log_flush. Change + the default WT_SESSION::transaction_sync timeout to 20 minutes rather than infinity. +WT-2151 Enhance logging configuration to allow reconfiguration and add a new "log=(zero_fill)" + configuration option that causes WiredTiger to zero-fill log files on creation. +WT-2200 Add a new configuration option to ::wiredtiger_open called "write_through" that + causes WiredTiger to specify the FILE_FLAG_WRITE_THROUGH on Windows when writing files + (default false, including when "direct_io" is configured). +WT-2217 After a successful call to WT_CURSOR::insert, the key and value will be + cleared from the cursor. See the upgrading documentation for more information. +SERVER-17078 Add a "statistics=(size)" mode to statistics cursors, which allows for + retrieving file size only. +SERVER-18356 Changed the handling of the "config_base" option to ::wiredtiger_open. See + upgrading documentation for more information. The following statistics were removed: -* f1ed3b9 WT-1481: connection dhandles swept. -* f1ed3b9 WT-1481: connection candidate referenced. -* 4ba4518 WT-1481: failed to find a slot large enough for record. -* 28563af WT-1989: log buffer size increases. -* f81c70d WT-1989: slots selected for switching that were unavailable. -* df4f69c WT-2094: log records written directly. -* df4f69c WT-2094: record size exceeded maximum. -* d68e078 WT-2182: pages split during eviction. +WT-1481 connection dhandles swept. +WT-1481 connection candidate referenced. +WT-1481 failed to find a slot large enough for record. +WT-1989 log buffer size increases. +WT-1989 slots selected for switching that were unavailable. +WT-2094 log records written directly. +WT-2094 record size exceeded maximum. +WT-2182 pages split during eviction. Lookaside table: -* 6a5a461 WT-1967: Allow eviction of updates required by old readers. -* 87592ec WT-2074: Fix a race between lookaside table reconciliation and checkpoints. -* 0390b29 WT-2149: Fix the order of creation of the lookaside table. -* 7518a69 WT-2190: Fix transaction visibility test that is applied to the lookaside table. -* 2cf57a6 SERVER-21585: Don't use the lookaside file until the cache is stuck full. +WT-1967 Allow eviction of updates required by old readers. +WT-2074 Fix a race between lookaside table reconciliation and checkpoints. +WT-2149 Fix the order of creation of the lookaside table. +WT-2190 Fix transaction visibility test that is applied to the lookaside table. +SERVER-21585 Don't use the lookaside file until the cache is stuck full. Issues fixed in MongoDB: -* d57dc26 SERVER-18829: Have pages start in the middle of the LRU queue for eviction. -* b847ccc SERVER-18838: During drops, don't remove files until the metadata is durable. -* 8f7da9a SERVER-18875: Clean up deleted pages. -* d04083d SERVER-18899: Add unit test to simulate fsyncLock. -* 3ec45a7 SERVER-19340: Avoid type aliasing in the random number generator. -* 907c0ca SERVER-19445: Have the oldest transaction update the oldest tracked ID. -* fb8739f SERVER-19522: Try to evict internal pages with no useful child pages. -* 4545a8b SERVER-19573: Change row-store inserts to avoid page locking. -* b52d2d3 SERVER-19751: Retry pthread_create on EAGAIN or EINTR. -* 46b4ad5 SERVER-19954: Don't scan tracked handles during checkpoints. -* 65abd20 SERVER-19989: Add a write barrier before data handles are added to shared lists. -* 3e46e79 SERVER-19990: Don't assert on eviction of live updates from dead trees. -* 38dad39 SERVER-20008: Don't reset eviction walks when hitting a busy page. -* 3b72361 SERVER-20159: Make all readers wait while the cache is full. -* 8be547b SERVER-20193: Fix obsolete transaction check. -* ad56c6a SERVER-20303: Tune in-memory splits when inserting large objects. -* 7505a02 SERVER-20385: Make WT_CURSOR::next(random) more random. -* 35d46c3 SERVER-21027: Reverse split if there are many deleted pages. -* a6da10e SERVER-21553: Enable fast-path truncate after splits. -* 890ee34 SERVER-21619: Don't do internal page splits after a tree is marked DEAD. -* 0e93d60 SERVER-21691: Avoid insert stalls. +SERVER-18829 Have pages start in the middle of the LRU queue for eviction. +SERVER-18838 During drops, don't remove files until the metadata is durable. +SERVER-18875 Clean up deleted pages. +SERVER-18899 Add unit test to simulate fsyncLock. +SERVER-19340 Avoid type aliasing in the random number generator. +SERVER-19445 Have the oldest transaction update the oldest tracked ID. +SERVER-19522 Try to evict internal pages with no useful child pages. +SERVER-19573 Change row-store inserts to avoid page locking. +SERVER-19751 Retry pthread_create on EAGAIN or EINTR. +SERVER-19954 Don't scan tracked handles during checkpoints. +SERVER-19989 Add a write barrier before data handles are added to shared lists. +SERVER-19990 Don't assert on eviction of live updates from dead trees. +SERVER-20008 Don't reset eviction walks when hitting a busy page. +SERVER-20159 Make all readers wait while the cache is full. +SERVER-20193 Fix obsolete transaction check. +SERVER-20303 Tune in-memory splits when inserting large objects. +SERVER-20385 Make WT_CURSOR::next(random) more random. +SERVER-21027 Reverse split if there are many deleted pages. +SERVER-21553 Enable fast-path truncate after splits. +SERVER-21619 Don't do internal page splits after a tree is marked DEAD. +SERVER-21691 Avoid insert stalls. Other note worthy changes since the previous release: -* bc2aa57 WT-1744: Throttle worker threads based on eviction targets. -* 55a989e WT-1845: Allow read only transactions to commit after failure. -* df625dc WT-1869: Avoid doing in memory splits while checkpointing a tree. -* ddac54f WT-1942: Add atomic implementations for PPC64 architecture. -* 3866fa6 WT-1962: Make the hot_backup_lock a read/write lock. -* 58f9e99 WT-1963: Fix backup cursor Java API. -* 4e0fe59 WT-1964: Fix a bug in the Java API when closing handles from a different thread. -* 60e2150 WT-1966: Change how the shared cache assigns priority to participants. -* 76d2e73 WT-1975: Ensure previous log files are complete for forced sync. -* e43b22a WT-1977: Improve performance of getting snapshots with many sessions. -* 5eaf63e WT-1978: Better checking and tests for index cursor comparison. -* 1602a4b WT-1981: Fix a signed 32-bit integer unpacking bug. -* cd1704d WT-1982: Fix a bug where cached overflow items were freed too early. -* 57a9f38 WT-1985: Integer packing and other fixes for Python and Java. -* 9897eb2 WT-1986: Fix a race renaming temporary log files. -* b10bff9 WT-1989: Improve scalability of log writes. -* f8dc12b WT-1996: Fix a bug where we would free the fist update during a page rewrite on error. -* 144a383 WT-1998: Fixes for indexes with some rarely used key/value formats. -* 8af8b8a WT-2002: Fix a bug in verify where it would panic when encountering a corrupted file. -* e1d8bc7 WT-2007: Statically allocate log slot buffers to a maximum size. -* 911158c WT-2008: Fix a bug in recovery where a file create went missing. -* 3e2e7e6 WT-2009: Apply tracked metadata operations post-commit. -* 1255cb2 WT-2012: Fix a bug updating the oldest ID. -* ef9d56f WT-2013: Add gcc asm definitions for ARM64. -* c8633e6 WT-2014: Fix a bug in checkpoints where files could be flushed in the wrong order. -* 9b09e69 WT-2015: Fix a bug in error handling during block open. -* 4938b8d WT-2017: Once an eviction server thread is started keep it running. -* 298f86c WT-2019: Fix a logic bug tracking the maximum transaction ID in clean trees. -* 7d6075c WT-2020: Clarify checksum error failure messages. -* 7b302d3 WT-2021: Fix a bug moving the oldest ID forward (introduced by WT-1967). -* 9df72d7 WT-2022: Fix a bug not releasing a handle when opening a non-existent index cursor. -* 81ffc2d WT-2023: Improve locking primitives: simplify read-write lock operations. -* 6b84722 WT-2029: Improve scalability of statistics. -* f97cfe9 WT-2031: Log slot revamp. -* bee11c3 WT-2032: Improve next_random cursors to work with small trees. -* cf53696 WT-2034: Improve shared cache balancing algorithm. -* aee1c94 WT-2035: For index cursors, keep track of which column groups need to be positioned. -* 36310d4 WT-2036: Make handle sweeps more robust. -* c948fbb WT-2037: Only write a checkpoint to the log on close if it wasn't. -* e25e615 WT-2038: Avoid long scans holding the handle list lock. -* 75a4655 WT-2039: Add error check and unit test for log records over 4 GB. -* 5ab26af WT-2042: Only try to evict tombstones that are visible to all readers. -* ce223ac WT-2045: Don't let the eviction server do slow reconciliation, it can stall eviction. -* 6665618 WT-2046: Add a statistic for search restarts. -* 98b4a28 WT-2047: Fix a bug in the random generator code to handle an uninitialized state. -* 258e2e1 WT-2050: Show size with memory allocation errors. -* 2e1471c WT-2053: Fix a bug in disk verify messages. -* e316e61 WT-2056: Reorder btree cursor close so stats are maintained correctly. -* 70f9100 WT-2057: Remove the verbose configuration when writing the base configuration file. -* 41b6fb8 WT-2058: Fix an alignment bug in the mutex and log-slot code. -* d72012b WT-2059: Include non-aggregated stats in cursor results. -* 3e0c7bf WT-2062: Try harder to make progress on in-memory splits. -* 66757f7 WT-2064: Don't spin indefinitely waiting for the handle list lock in eviction. -* 8f42f02 WT-2066: Update the oldest transaction ID from eviction. -* e167592 WT-2068: Protect discarding handles with the handle list lock. -* fd72a09 WT-2075: Fix a hang in logging with parallel workload. -* 11c0fa0 WT-2078: Fix a bug in error handling with statistics cursors. -* 9734d85 WT-2081: Make verify progress reporting less verbose. -* 6008b41 WT-2085: Run some of the log_server threads operations more frequently. -* 39a69ec WT-2086: Add a statistic to track when eviction finds a page that can be split. -* 334e103 WT-2089: Relax restrictions on multiblock eviction and in-memory splits. -* f13b788 WT-2090: Fix a bug in the Windows OS layer that swallowed error returns. -* 83b8db7 WT-2092: Free log condition variables after all threads are joined. -* d9391c0 WT-2093: Use the C99 bool type to clarify when functions return true/false. -* f883d27 WT-2094: Eliminate direct write and record unbuffered log records. -* 9008260 WT-2097: Reintroduce immediate waits when forced eviction is necessary. -* ff1da28 WT-2100: Rename evict to evict_queue so it's easier to search for. -* 41db2ee WT-2101: Don't update the logging ckpt_lsn on clean shutdown. -* e1d6886 WT-2102: Fix a hang in log slot join when forcing log writes. -* 0e96683 WT-2105: Fix a bug where we could reference an invalid memory address if a file is - corrupted on disk. -* 6a565bc WT-2108: Rework in-memory page rewrite support (WT_PM_REC_REWRITE). -* dcb0ddb WT-2114: Make application eviction fairer. -* 10c2f15 WT-2115: Don't skip truncated pages that are part of a checkpoint. -* cd6ce97 WT-2116: Add diagnostic checks for stuck cache and dump the state. -* 51cf672 WT-2119: Don't evict clean multiblock pages with overflow items during checkpoints. -* 346ad40 WT-2126: Clean up if there is an error during splits. -* 6831485 WT-2127: Deepen the tree more regularly to avoid wide internal pages. -* a0b5d2b WT-2128: When decoding huffman encoding during salvage it's possible to have fewer bits - than the symbol length during decoding, if the value has been corrupted. -* 79f74e5 WT-2131: Switch to using a lock to control page splits to avoid starvation. -* 02a3d9f WT-2132: Make debug dump function more robust to errors. -* 8c223e4 WT-2134: Flush all buffered log records in log_flush. -* d1b5e7f WT-2135: Fix log_only setting for backup cursor. Fix initialization. -* aab8101 WT-2137: Check the sync_lsn is in the correct file before moving it forward. -* 323af84 WT-2139: Fix a transaction visibility bug in read-uncommitted transactions. -* 751c628 WT-2146: Improve performance when searching for short keys. -* 62998ce WT-2148: Fix a compiler warning in encoding functions. -* 6c16fdd WT-2153: Fix bug. Now we always need to start the log_server thread. -* 6a5fca3 WT-2154: Make btree dump safer. -* 0d74bc6 WT-2155: Remove last use of F_CAS_ATOMIC and the associated macro. -* cc42bda WT-2156: Allow eviction workers to restart. -* bf1d359 WT-2157: Fix a bug where a failed page split could lead to incomplete checkpoints. -* ce9d265 WT-2159: Don't check the config twice in one path. -* 544f27d WT-2162: Add null pointer check, needed after an index is dropped. -* 0d85ebe WT-2164: Prevent another LSM chunk checkpoint while the first is still in progress. -* a81aae8 WT-2165: Stop using FALLOC_FL_KEEP_SIZE flag when pre-allocating files. -* 2865a76 WT-2167: Switch recovery to using an internal session. -* 5d4c952 WT-2170: Protect the turtle file with a lock. -* 497b744 WT-2174: Avoid the table list lock when creating a size only statistics cursor. -* fdfa804 WT-2178: In-memory storage engine support. -* b9bd01f WT-2179: Added decorator to mark txn13 as part of the --long test suite. -* be544dd WT-2180: Remove cursor.{search,search-near,remove} key size validation. -* be412b5 WT-2182: When internal pages grow large enough, split them into their parents. -* c27e78e WT-2184: Fix log scan bug when final record has many trailing zeros. -* 9584be3 WT-2185: Don't do reverse splits when closing a file. -* f6b12d3 WT-2187: Add flag for flushing a slot. -* a4545bf WT-2189: Update flag set and clear macros to be less error prone. -* 30ab327 WT-2191: In-memory disk image no longer the same as saved updates. -* 4ba5698 WT-2192: Fix the logic around checking whether internal page is evictable. -* 2f0b3e2 WT-2193: Handle read-committed metadata checkpoints during snapshot transactions. -* 9b1febc WT-2194: Java close callbacks should handle cursors that Java code did not open. -* 438f455 WT-2195: Fix a hang after giving up on a reverse split. -* ff27fe9 WT-2196: Fix error handling in size only statistics. -* 0a1ee34 WT-2199: Fix transaction sync inconsistency. -* 2ff1fd6 WT-2203: Release an allocated page on error. -* 3b3cf2a WT-2204: Don't take a local copy of page->modify until we know the page is dirty. -* 179d4d0 WT-2206: Change cache operations from flags to an enumeration. -* 82514ca WT-2207: Track whenever a session has a handle exclusive. -* 78bd4ac WT-2210: Raw compression fails if row-store recovery precedes column-store recovery. -* c360d53 WT-2212: Add a "use_environment" config to ::wiredtiger_open. -* a72ddb7 WT-2218: Add truncate stats. -* ce8c091 WT-2219: Enhancements to in-memory testing. -* e2f1130 WT-2220: Update time comparison macros. -* 59857f9 WT-2222: Add statistics for named snapshots. -* fb9cebe WT-2224: Track which deleted refs are discarded by a split. -* cace179 WT-2228: Avoid unnecessary raw-compression calls. -* 0a52a80 WT-2237: Have threads publish unique transaction IDs so that updates always become - visible immediately on commit. -* 6c7338f WT-2241: Use a lock to protect transaction ID allocation. -* 39dfd21 WT-2243: Don't keep transaction IDs pinned for reading from checkpoints. -* 4c49948 WT-2244: Trigger in-memory splits sooner. -* 9f2e4f3 WT-2248: WT_SESSION::close is updating WT_CONNECTION_IMPL.default_session. -* 264ec21 WT-2249: Keep eviction stuck until cache usage is under 100%. -* dca1411 WT-2250: Minor fix. Use SET instead of increment for stat. -* e731ef8 WT-2251: Free addresses when we discard deleted page references. -* 4fc3e39 WT-2253: Evict pages left behind by in-memory splits. -* 2df5658 WT-2257: Fixes when given multiple thread workload configurations. -* 4c49043 WT-2260: Avoid adding internal pages to the eviction queue +WT-1744 Throttle worker threads based on eviction targets. +WT-1845 Allow read only transactions to commit after failure. +WT-1869 Avoid doing in memory splits while checkpointing a tree. +WT-1942 Add atomic implementations for PPC64 architecture. +WT-1962 Make the hot_backup_lock a read/write lock. +WT-1963 Fix backup cursor Java API. +WT-1964 Fix a bug in the Java API when closing handles from a different thread. +WT-1966 Change how the shared cache assigns priority to participants. +WT-1975 Ensure previous log files are complete for forced sync. +WT-1977 Improve performance of getting snapshots with many sessions. +WT-1978 Better checking and tests for index cursor comparison. +WT-1981 Fix a signed 32-bit integer unpacking bug. +WT-1982 Fix a bug where cached overflow items were freed too early. +WT-1985 Integer packing and other fixes for Python and Java. +WT-1986 Fix a race renaming temporary log files. +WT-1989 Improve scalability of log writes. +WT-1996 Fix a bug where we would free the fist update during a page rewrite on error. +WT-1998 Fixes for indexes with some rarely used key/value formats. +WT-2002 Fix a bug in verify where it would panic when encountering a corrupted file. +WT-2007 Statically allocate log slot buffers to a maximum size. +WT-2008 Fix a bug in recovery where a file create went missing. +WT-2009 Apply tracked metadata operations post-commit. +WT-2012 Fix a bug updating the oldest ID. +WT-2013 Add gcc asm definitions for ARM64. +WT-2014 Fix a bug in checkpoints where files could be flushed in the wrong order. +WT-2015 Fix a bug in error handling during block open. +WT-2017 Once an eviction server thread is started keep it running. +WT-2019 Fix a logic bug tracking the maximum transaction ID in clean trees. +WT-2020 Clarify checksum error failure messages. +WT-2021 Fix a bug moving the oldest ID forward (introduced by WT-1967). +WT-2022 Fix a bug not releasing a handle when opening a non-existent index cursor. +WT-2023 Improve locking primitives: simplify read-write lock operations. +WT-2029 Improve scalability of statistics. +WT-2031 Log slot revamp. +WT-2032 Improve next_random cursors to work with small trees. +WT-2034 Improve shared cache balancing algorithm. +WT-2035 For index cursors, keep track of which column groups need to be positioned. +WT-2036 Make handle sweeps more robust. +WT-2037 Only write a checkpoint to the log on close if it wasn't. +WT-2038 Avoid long scans holding the handle list lock. +WT-2039 Add error check and unit test for log records over 4 GB. +WT-2042 Only try to evict tombstones that are visible to all readers. +WT-2045 Don't let the eviction server do slow reconciliation, it can stall eviction. +WT-2046 Add a statistic for search restarts. +WT-2047 Fix a bug in the random generator code to handle an uninitialized state. +WT-2050 Show size with memory allocation errors. +WT-2053 Fix a bug in disk verify messages. +WT-2056 Reorder btree cursor close so stats are maintained correctly. +WT-2057 Remove the verbose configuration when writing the base configuration file. +WT-2058 Fix an alignment bug in the mutex and log-slot code. +WT-2059 Include non-aggregated stats in cursor results. +WT-2062 Try harder to make progress on in-memory splits. +WT-2064 Don't spin indefinitely waiting for the handle list lock in eviction. +WT-2066 Update the oldest transaction ID from eviction. +WT-2068 Protect discarding handles with the handle list lock. +WT-2075 Fix a hang in logging with parallel workload. +WT-2078 Fix a bug in error handling with statistics cursors. +WT-2081 Make verify progress reporting less verbose. +WT-2085 Run some of the log_server threads operations more frequently. +WT-2086 Add a statistic to track when eviction finds a page that can be split. +WT-2089 Relax restrictions on multiblock eviction and in-memory splits. +WT-2090 Fix a bug in the Windows OS layer that swallowed error returns. +WT-2092 Free log condition variables after all threads are joined. +WT-2093 Use the C99 bool type to clarify when functions return true/false. +WT-2094 Eliminate direct write and record unbuffered log records. +WT-2097 Reintroduce immediate waits when forced eviction is necessary. +WT-2100 Rename evict to evict_queue so it's easier to search for. +WT-2101 Don't update the logging ckpt_lsn on clean shutdown. +WT-2102 Fix a hang in log slot join when forcing log writes. +WT-2105 Fix a bug where we could reference an invalid memory address if a file is corrupted on disk. +WT-2108 Rework in-memory page rewrite support (WT_PM_REC_REWRITE). +WT-2114 Make application eviction fairer. +WT-2115 Don't skip truncated pages that are part of a checkpoint. +WT-2116 Add diagnostic checks for stuck cache and dump the state. +WT-2119 Don't evict clean multiblock pages with overflow items during checkpoints. +WT-2126 Clean up if there is an error during splits. +WT-2127 Deepen the tree more regularly to avoid wide internal pages. +WT-2128 When decoding huffman encoding during salvage it's possible to have fewer bits than the + symbol length during decoding, if the value has been corrupted. +WT-2131 Switch to using a lock to control page splits to avoid starvation. +WT-2132 Make debug dump function more robust to errors. +WT-2134 Flush all buffered log records in log_flush. +WT-2135 Fix log_only setting for backup cursor. Fix initialization. +WT-2137 Check the sync_lsn is in the correct file before moving it forward. +WT-2139 Fix a transaction visibility bug in read-uncommitted transactions. +WT-2146 Improve performance when searching for short keys. +WT-2148 Fix a compiler warning in encoding functions. +WT-2153 Fix bug. Now we always need to start the log_server thread. +WT-2154 Make btree dump safer. +WT-2155 Remove last use of F_CAS_ATOMIC and the associated macro. +WT-2156 Allow eviction workers to restart. +WT-2157 Fix a bug where a failed page split could lead to incomplete checkpoints. +WT-2159 Don't check the config twice in one path. +WT-2162 Add null pointer check, needed after an index is dropped. +WT-2164 Prevent another LSM chunk checkpoint while the first is still in progress. +WT-2165 Stop using FALLOC_FL_KEEP_SIZE flag when pre-allocating files. +WT-2167 Switch recovery to using an internal session. +WT-2170 Protect the turtle file with a lock. +WT-2174 Avoid the table list lock when creating a size only statistics cursor. +WT-2178 In-memory storage engine support. +WT-2179 Added decorator to mark txn13 as part of the --long test suite. +WT-2180 Remove cursor.{search,search-near,remove} key size validation. +WT-2182 When internal pages grow large enough, split them into their parents. +WT-2184 Fix log scan bug when final record has many trailing zeros. +WT-2185 Don't do reverse splits when closing a file. +WT-2187 Add flag for flushing a slot. +WT-2189 Update flag set and clear macros to be less error prone. +WT-2191 In-memory disk image no longer the same as saved updates. +WT-2192 Fix the logic around checking whether internal page is evictable. +WT-2193 Handle read-committed metadata checkpoints during snapshot transactions. +WT-2194 Java close callbacks should handle cursors that Java code did not open. +WT-2195 Fix a hang after giving up on a reverse split. +WT-2196 Fix error handling in size only statistics. +WT-2199 Fix transaction sync inconsistency. +WT-2203 Release an allocated page on error. +WT-2204 Don't take a local copy of page->modify until we know the page is dirty. +WT-2206 Change cache operations from flags to an enumeration. +WT-2207 Track whenever a session has a handle exclusive. +WT-2210 Raw compression fails if row-store recovery precedes column-store recovery. +WT-2212 Add a "use_environment" config to ::wiredtiger_open. +WT-2218 Add truncate stats. +WT-2219 Enhancements to in-memory testing. +WT-2220 Update time comparison macros. +WT-2222 Add statistics for named snapshots. +WT-2224 Track which deleted refs are discarded by a split. +WT-2228 Avoid unnecessary raw-compression calls. +WT-2237 Have threads publish unique transaction IDs so that updates always become visible + immediately on commit. +WT-2241 Use a lock to protect transaction ID allocation. +WT-2243 Don't keep transaction IDs pinned for reading from checkpoints. +WT-2244 Trigger in-memory splits sooner. +WT-2248 WT_SESSION::close is updating WT_CONNECTION_IMPL.default_session. +WT-2249 Keep eviction stuck until cache usage is under 100%. +WT-2250 Minor fix. Use SET instead of increment for stat. +WT-2251 Free addresses when we discard deleted page references. +WT-2253 Evict pages left behind by in-memory splits. +WT-2257 Fixes when given multiple thread workload configurations. +WT-2260 Avoid adding internal pages to the eviction queue WiredTiger release 2.6.1, 2015-05-13 ------------------------------------ diff --git a/README b/README index 07dde47feaf..bdb9003c22d 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ -WiredTiger 2.7.1: (December 8, 2015) +WiredTiger 2.8.0: (March 23, 2016) -This is version 2.7.1 of WiredTiger. +This is version 2.8.0 of WiredTiger. WiredTiger release packages and documentation can be found at: @@ -8,7 +8,7 @@ WiredTiger release packages and documentation can be found at: The documentation for this specific release can be found at: - http://source.wiredtiger.com/2.7.1/index.html + http://source.wiredtiger.com/2.8.0/index.html The WiredTiger source code can be found at: diff --git a/RELEASE_INFO b/RELEASE_INFO index d2c7995910e..7d6f8a982b4 100644 --- a/RELEASE_INFO +++ b/RELEASE_INFO @@ -1,6 +1,6 @@ WIREDTIGER_VERSION_MAJOR=2 -WIREDTIGER_VERSION_MINOR=7 -WIREDTIGER_VERSION_PATCH=1 +WIREDTIGER_VERSION_MINOR=8 +WIREDTIGER_VERSION_PATCH=0 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH" WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"` diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4 index 6c7c6eed9cf..746c73499df 100644 --- a/build_posix/aclocal/version-set.m4 +++ b/build_posix/aclocal/version-set.m4 @@ -1,14 +1,14 @@ dnl build by dist/s_version VERSION_MAJOR=2 -VERSION_MINOR=7 -VERSION_PATCH=1 -VERSION_STRING='"WiredTiger 2.7.1: (December 8, 2015)"' +VERSION_MINOR=8 +VERSION_PATCH=0 +VERSION_STRING='"WiredTiger 2.8.0: (March 23, 2016)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) AC_SUBST(VERSION_PATCH) AC_SUBST(VERSION_STRING) -VERSION_NOPATCH=2.7 +VERSION_NOPATCH=2.8 AC_SUBST(VERSION_NOPATCH) diff --git a/build_posix/aclocal/version.m4 b/build_posix/aclocal/version.m4 index 3b690982f9d..9c12f14ad48 100644 --- a/build_posix/aclocal/version.m4 +++ b/build_posix/aclocal/version.m4 @@ -1,2 +1,2 @@ dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version -2.7.1 +2.8.0 diff --git a/dist/package/wiredtiger.spec b/dist/package/wiredtiger.spec index 5bbb26885c0..a9eff97794c 100644 --- a/dist/package/wiredtiger.spec +++ b/dist/package/wiredtiger.spec @@ -1,5 +1,5 @@ Name: wiredtiger -Version: 2.7.1 +Version: 2.8.0 Release: 1%{?dist} Summary: WiredTiger data storage engine diff --git a/dist/s_string.ok b/dist/s_string.ok index 6762521ca76..0ec07b45e8d 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -1035,6 +1035,7 @@ undef unencrypted unesc unescaped +unicode uninstantiated unistd unlinked diff --git a/src/docs/top/main.dox b/src/docs/top/main.dox index 7e670541e7d..08f7536d540 100644 --- a/src/docs/top/main.dox +++ b/src/docs/top/main.dox @@ -6,12 +6,12 @@ WiredTiger is an high performance, scalable, production quality, NoSQL, @section releases Releases +@row{WiredTiger 2.8.0 (current), + [Release package], + [Documentation]} @row{WiredTiger 2.7.0 (current), [Release package], [Documentation]} -@row{WiredTiger 2.6.1 (previous), - [Release package], - [Documentation]} @row{Development branch, [Source code], [Documentation]} -- cgit v1.2.1 From fc99d7e5307ef6a57b3a53edde16d9b9490fe982 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 23 Mar 2016 16:20:47 +1100 Subject: Fixup changelog so it renders in our documentation. --- NEWS | 712 +++++++++++++++++++++++++++++++++---------------------------------- 1 file changed, 356 insertions(+), 356 deletions(-) diff --git a/NEWS b/NEWS index 4373d69c087..d8c638e748f 100644 --- a/NEWS +++ b/NEWS @@ -6,153 +6,153 @@ fixes. New features and API changes; refer to the API documentation for full details: -WT-60 Port WiredTiger to run on big endian platforms -WT-2287 Add a new WT_SESSION.rebalance API -WT-2333 Add a no_wait configuration setting to WT_SESSION.drop so it doesn't block -WT-2349 Add a readonly configuration setting to wiredtiger_open -WT-2363 Remove built in support for bzip2 compression -WT-2404 Add streaming pack/unpack methods to the extension API +* WT-60 Port WiredTiger to run on big endian platforms +* WT-2287 Add a new WT_SESSION.rebalance API +* WT-2333 Add a no_wait configuration setting to WT_SESSION.drop so it doesn't block +* WT-2349 Add a readonly configuration setting to wiredtiger_open +* WT-2363 Remove built in support for bzip2 compression +* WT-2404 Add streaming pack/unpack methods to the extension API Significant changes and bug fixes: -WT-1801 Add a directory sync after rollback of a WT_SESSION::rename operation -WT-2130 Improve on-disk page utilization with random workloads -WT-2275 Fix a database corruption after application crash -WT-2264 High update workloads can cause checkpoints to never complete -WT-2290 Improve how effective WT_SESSION.compact is -WT-2361 Fix a bug in column-store where verify identifies out of order data -WT-2367 Fix a bug in WT_CURSOR.next that could cause out-of-order key returns -WT-2374 Fix a bug where a database was corrupted after a hard crash -WT-2451 Allow the WiredTiger metadata to be evicted -WT-2490 Fix a bug in column-store where search_near() returns the wrong key +* WT-1801 Add a directory sync after rollback of a WT_SESSION::rename operation +* WT-2130 Improve on-disk page utilization with random workloads +* WT-2275 Fix a database corruption after application crash +* WT-2264 High update workloads can cause checkpoints to never complete +* WT-2290 Improve how effective WT_SESSION.compact is +* WT-2361 Fix a bug in column-store where verify identifies out of order data +* WT-2367 Fix a bug in WT_CURSOR.next that could cause out-of-order key returns +* WT-2374 Fix a bug where a database was corrupted after a hard crash +* WT-2451 Allow the WiredTiger metadata to be evicted +* WT-2490 Fix a bug in column-store where search_near() returns the wrong key Issues fixed in MongoDB: -SERVER-21619 sys-perf: WT crash during core_workloads_WT execution -SERVER-21833 Enhance WT_SESSION::compact to more reliably release space -SERVER-21887 Enhance $sample to be faster on newly created collection -SERVER-22676 Fix a bug in WiredTiger fails to open databases created by 3.0.0 or 3.0.1 -SERVER-22773 CRC32 implementation on PPC was broken -SERVER-22831 Low query rate with heavy cache pressure and an idle collection +* SERVER-21619 sys-perf: WT crash during core_workloads_WT execution +* SERVER-21833 Enhance WT_SESSION::compact to more reliably release space +* SERVER-21887 Enhance $sample to be faster on newly created collection +* SERVER-22676 Fix a bug in WiredTiger fails to open databases created by 3.0.0 or 3.0.1 +* SERVER-22773 CRC32 implementation on PPC was broken +* SERVER-22831 Low query rate with heavy cache pressure and an idle collection Other note worthy changes since the previous release: -WT-1517 Fix error handling around schema format edge cases -WT-2060 Simplify aggregation of statistics -WT-2073 Metadata cleanups -WT-2099 Seeing memory underflow messages -WT-2107 Add example code including an event handler -WT-2113 Truncate test occasionally fails with unexpected EBUSY -WT-2123 Don't clear allocated memory if not required -WT-2173 Fix some cases where tiny caches could get stuck full -WT-2177 Add an optional per-thread seed to random number generator -WT-2198 Bulk load and column store appends -WT-2215 WT_LSN needs to support atomic reads and updates -WT-2216 Simplify row-store search loop slightly -WT-2231 Pinned page cursor searches could check parent keys -WT-2235 Add a unicode option to WiredTiger printlog utility -WT-2242 WiredTiger treats dead trees the same as other trees in eviction -WT-2246 Improve performance for column-store append searches -WT-2247 Variable-length column-store in-memory page splits -WT-2258 Stop WiredTiger pre-loading pages when direct-IO is configured -WT-2259 Fix error handling when getting exclusive access to a btree -WT-2262 Fix random cursor next so it is not skewed by tree shape -WT-2265 WiredTiger related change in PPC specific code block in gcc.h -WT-2272 Fix a bug in the sweep server that triggered an assertion -WT-2276 Add a tool to decode checkpoint addr -WT-2277 Remove WT check against big-endian systems -WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined -WT-2280 Add CRC32 Optimized code for PPC -WT-2282 Error in wt_txn_update_oldest verbose message test -WT-2283 Retry in txn_update_oldest results in a hang -WT-2285 Enhance configure to set BUFFER_ALIGNMENT_DEFAULT to 4kb on Linux -WT-2289 Fix a bug in btree search when doing a fast key check -WT-2291 Random cursor walk inefficient in skip list only trees -WT-2295 WT_SESSION.create does a full-scan of the main table -WT-2296 Improve log algorithm for sync/flush settings -WT-2297 Fix off-by-one error in Huffman config file parsing -WT-2299 Clean up layering violation between btree and block manager code -WT-2307 Fix a bug where internal page splits can corrupt cursor iteration -WT-2308 Add support for custom extractor for ref_cursors in join cursor -WT-2311 Add support for UltraSparc platform -WT-2312 Fix a bug where re-creating a deleted column-store page can corrupt the in-memory tree -WT-2313 Fix a bug in the sweep server -WT-2314 Update page-swap error handling so that it is consistent -WT-2316 Fix a bug in WT_CURSOR.prev where it could return keys out-of-order -WT-2318 Enhance condition wait implementation to use less CPU on idle databases -WT-2321 Fix a race between eviction and worker threads on the eviction queue -WT-2322 Fix a bug in read-uncommitted join cursors where using Bloom filters is unsafe -WT-2328 Update schema drop does to use the block manager interface for file removal -WT-2331 Checking of search result for reference cursors before join -WT-2332 Fix a bug in logging write-no-sync mode -WT-2335 Fix a bug where parsing an invalid configuration string could segfault -WT-2338 Disable using pre-allocated log files when a backup cursor is open -WT-2339 Fix a bug in rebalance that caused database verification failure -WT-2340 Add logging guarantee assertions -WT-2345 Avoid creating tiny pages on disk when evicting small pages from cache -WT-2346 Enhance checkpoint implementation so the schema lock is not held during I/O -WT-2347 Fix some schema format edge cases in Java API -WT-2352 Allow build and test without requiring lz4 -WT-2355 Fix minor scratch buffer usage in logging -WT-2356 log scan advances to next log file on partially written record -WT-2368 Fix a bug where row-store can pass invalid keys to collator functions -WT-2369 Use C compiler to detect headers instead of C++ compiler -WT-2371 Fix a bug where parent split cannot access the page after page-index swap -WT-2372 WiredTiger windows builder fails with C4005 against the "inline" macro -WT-2375 Add tests for custom collators -WT-2378 Fix a hang in LSM when doing forced drop with the no wait option -WT-2381 Fix the dump utility so it does not discard the table configuration -WT-2382 Fix a bug in join cursors with custom collator for 'u' format -WT-2384 Fix a bug in join cursors where lt, le conditions for ordering could be wrong -WT-2387 Fix cursor random unit test on Windows -WT-2390 Fix the OS X build -WT-2391 Enhance eviction so that it is less likely to evict pages from indexes -WT-2394 Fix a bug in compact that meant we didn't always reclaim available space -WT-2395 Fix a recovery failure with an LSM tree -WT-2396 Fix a deadlock between table drop and checkpoint -WT-2397 Fix a bug in cursor traversal where doing a reverse walk could skip records. -WT-2399 Add test case that verifies cursor traversal -WT-2409 Fix a minor performance regression in LSM -WT-2410 Stop casting function pointers to different types -WT-2411 Fix a hang in LSM related to dropping tables -WT-2414 Avoid extractor calls for ordering cursor in join cursor -WT-2417 Windows Jenkins task is failing -WT-2418 Fix a bug in WT_SESSION.rebalance where it could return EBUSY -WT-2420 Fix a bug in LSM where recovery from a backup could fail -WT-2423 Fix a bug in session reference counting on error handling -WT-2425 Fix a performance regression in wtperf evict-btree read workload -WT-2426 Fix a deadlock caused by recent changes to checkpoint handle locking -WT-2428 Make statistics logging compatible with MongoDB -WT-2429 Add a statistic that tracks aggressive mode in eviction -WT-2430 Add statistics for join cursor -WT-2432 Fix a performance regression on LSM and read only workloads -WT-2433 Allow read-only databases to log statistics -WT-2434 Fix a race between force-drop and sweep -WT-2436 Fix a bug in join cursors with lt, le conditions and "strategy=bloom" -WT-2438 Extend WiredTiger stat declarations to help external tools -WT-2440 Fix a bug in the PPC checksum implementation -WT-2443 Add statistics for all indexes used in join cursor -WT-2447 Enhance join cursor implementation to avoid reading main table where possible -WT-2448 Add no_scale flag to relevant statistics -WT-2449 Enhance configure to check for a 64-bit build -WT-2454 Fix checkpoint_sync=false behavior to prevent flushes/sync to disk -WT-2456 Fix PPC CRC32 Code -WT-2457 Fix a bug where dropping an LSM table can fail with EBUSY when no user ops are active -WT-2459 Allow Configure scripts to provide the --tag option for libtool when compiling on PPC -WT-2460 Fix a bug where checkpoint could fail with WT_ROLLBACK -WT-2471 Update WiredTiger printf formats to be platform aware -WT-2476 Fix a race where btree->evict_lock is being accessed after being destroyed -WT-2481 Fix a recently introduced performance regression in LSM -WT-2483 Make read only testing more robust -WT-2485 Fix a test/format failure with floating point exception -WT-2492 Fix a bug in Windows where we used the different memory allocators accidentally -WT-2495 Missing memory initialization leads to crash on Windows -WT-2496 Fix a bug revealed by test/format unable to read root page -WT-2497 Enhance test/format to save a copy of backup -WT-2498 Fix a bug in LSM tree drop where it could hang when a user cursor is open -WT-2499 Fix a bug in LSM shutdown where a race condition causes a segfault -WT-2501 Fix a bug where dropping a just opened LSM tree isn't thread safe -WT-2502 Fix a memory leak in locking handles for checkpoint +* WT-1517 Fix error handling around schema format edge cases +* WT-2060 Simplify aggregation of statistics +* WT-2073 Metadata cleanups +* WT-2099 Seeing memory underflow messages +* WT-2107 Add example code including an event handler +* WT-2113 Truncate test occasionally fails with unexpected EBUSY +* WT-2123 Don't clear allocated memory if not required +* WT-2173 Fix some cases where tiny caches could get stuck full +* WT-2177 Add an optional per-thread seed to random number generator +* WT-2198 Bulk load and column store appends +* WT-2215 WT_LSN needs to support atomic reads and updates +* WT-2216 Simplify row-store search loop slightly +* WT-2231 Pinned page cursor searches could check parent keys +* WT-2235 Add a unicode option to WiredTiger printlog utility +* WT-2242 WiredTiger treats dead trees the same as other trees in eviction +* WT-2246 Improve performance for column-store append searches +* WT-2247 Variable-length column-store in-memory page splits +* WT-2258 Stop WiredTiger pre-loading pages when direct-IO is configured +* WT-2259 Fix error handling when getting exclusive access to a btree +* WT-2262 Fix random cursor next so it is not skewed by tree shape +* WT-2265 WiredTiger related change in PPC specific code block in gcc.h +* WT-2272 Fix a bug in the sweep server that triggered an assertion +* WT-2276 Add a tool to decode checkpoint addr +* WT-2277 Remove WT check against big-endian systems +* WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined +* WT-2280 Add CRC32 Optimized code for PPC +* WT-2282 Error in wt_txn_update_oldest verbose message test +* WT-2283 Retry in txn_update_oldest results in a hang +* WT-2285 Enhance configure to set BUFFER_ALIGNMENT_DEFAULT to 4kb on Linux +* WT-2289 Fix a bug in btree search when doing a fast key check +* WT-2291 Random cursor walk inefficient in skip list only trees +* WT-2295 WT_SESSION.create does a full-scan of the main table +* WT-2296 Improve log algorithm for sync/flush settings +* WT-2297 Fix off-by-one error in Huffman config file parsing +* WT-2299 Clean up layering violation between btree and block manager code +* WT-2307 Fix a bug where internal page splits can corrupt cursor iteration +* WT-2308 Add support for custom extractor for ref_cursors in join cursor +* WT-2311 Add support for UltraSparc platform +* WT-2312 Fix a bug where re-creating a deleted column-store page can corrupt the in-memory tree +* WT-2313 Fix a bug in the sweep server +* WT-2314 Update page-swap error handling so that it is consistent +* WT-2316 Fix a bug in WT_CURSOR.prev where it could return keys out-of-order +* WT-2318 Enhance condition wait implementation to use less CPU on idle databases +* WT-2321 Fix a race between eviction and worker threads on the eviction queue +* WT-2322 Fix a bug in read-uncommitted join cursors where using Bloom filters is unsafe +* WT-2328 Update schema drop does to use the block manager interface for file removal +* WT-2331 Checking of search result for reference cursors before join +* WT-2332 Fix a bug in logging write-no-sync mode +* WT-2335 Fix a bug where parsing an invalid configuration string could segfault +* WT-2338 Disable using pre-allocated log files when a backup cursor is open +* WT-2339 Fix a bug in rebalance that caused database verification failure +* WT-2340 Add logging guarantee assertions +* WT-2345 Avoid creating tiny pages on disk when evicting small pages from cache +* WT-2346 Enhance checkpoint implementation so the schema lock is not held during I/O +* WT-2347 Fix some schema format edge cases in Java API +* WT-2352 Allow build and test without requiring lz4 +* WT-2355 Fix minor scratch buffer usage in logging +* WT-2356 log scan advances to next log file on partially written record +* WT-2368 Fix a bug where row-store can pass invalid keys to collator functions +* WT-2369 Use C compiler to detect headers instead of C++ compiler +* WT-2371 Fix a bug where parent split cannot access the page after page-index swap +* WT-2372 WiredTiger windows builder fails with C4005 against the "inline" macro +* WT-2375 Add tests for custom collators +* WT-2378 Fix a hang in LSM when doing forced drop with the no wait option +* WT-2381 Fix the dump utility so it does not discard the table configuration +* WT-2382 Fix a bug in join cursors with custom collator for 'u' format +* WT-2384 Fix a bug in join cursors where lt, le conditions for ordering could be wrong +* WT-2387 Fix cursor random unit test on Windows +* WT-2390 Fix the OS X build +* WT-2391 Enhance eviction so that it is less likely to evict pages from indexes +* WT-2394 Fix a bug in compact that meant we didn't always reclaim available space +* WT-2395 Fix a recovery failure with an LSM tree +* WT-2396 Fix a deadlock between table drop and checkpoint +* WT-2397 Fix a bug in cursor traversal where doing a reverse walk could skip records. +* WT-2399 Add test case that verifies cursor traversal +* WT-2409 Fix a minor performance regression in LSM +* WT-2410 Stop casting function pointers to different types +* WT-2411 Fix a hang in LSM related to dropping tables +* WT-2414 Avoid extractor calls for ordering cursor in join cursor +* WT-2417 Windows Jenkins task is failing +* WT-2418 Fix a bug in WT_SESSION.rebalance where it could return EBUSY +* WT-2420 Fix a bug in LSM where recovery from a backup could fail +* WT-2423 Fix a bug in session reference counting on error handling +* WT-2425 Fix a performance regression in wtperf evict-btree read workload +* WT-2426 Fix a deadlock caused by recent changes to checkpoint handle locking +* WT-2428 Make statistics logging compatible with MongoDB +* WT-2429 Add a statistic that tracks aggressive mode in eviction +* WT-2430 Add statistics for join cursor +* WT-2432 Fix a performance regression on LSM and read only workloads +* WT-2433 Allow read-only databases to log statistics +* WT-2434 Fix a race between force-drop and sweep +* WT-2436 Fix a bug in join cursors with lt, le conditions and "strategy=bloom" +* WT-2438 Extend WiredTiger stat declarations to help external tools +* WT-2440 Fix a bug in the PPC checksum implementation +* WT-2443 Add statistics for all indexes used in join cursor +* WT-2447 Enhance join cursor implementation to avoid reading main table where possible +* WT-2448 Add no_scale flag to relevant statistics +* WT-2449 Enhance configure to check for a 64-bit build +* WT-2454 Fix checkpoint_sync=false behavior to prevent flushes/sync to disk +* WT-2456 Fix PPC CRC32 Code +* WT-2457 Fix a bug where dropping an LSM table can fail with EBUSY when no user ops are active +* WT-2459 Allow Configure scripts to provide the --tag option for libtool when compiling on PPC +* WT-2460 Fix a bug where checkpoint could fail with WT_ROLLBACK +* WT-2471 Update WiredTiger printf formats to be platform aware +* WT-2476 Fix a race where btree->evict_lock is being accessed after being destroyed +* WT-2481 Fix a recently introduced performance regression in LSM +* WT-2483 Make read only testing more robust +* WT-2485 Fix a test/format failure with floating point exception +* WT-2492 Fix a bug in Windows where we used the different memory allocators accidentally +* WT-2495 Missing memory initialization leads to crash on Windows +* WT-2496 Fix a bug revealed by test/format unable to read root page +* WT-2497 Enhance test/format to save a copy of backup +* WT-2498 Fix a bug in LSM tree drop where it could hang when a user cursor is open +* WT-2499 Fix a bug in LSM shutdown where a race condition causes a segfault +* WT-2501 Fix a bug where dropping a just opened LSM tree isn't thread safe +* WT-2502 Fix a memory leak in locking handles for checkpoint WiredTiger release 2.7.0, 2015-12-08 ------------------------------------ @@ -162,221 +162,221 @@ fixes. New features and API changes; refer to the API documentation for full details: -WT-147 Create indexes on non-empty tables. -WT-1315 Add an implementation of cursor joins via a new WT_SESSION::join API. -WT-1350 Add a new configuration option to ::wiredtiger_open and - WT_CONNECTION::reconfigure called "eviction_dirty_trigger" that causes eviction to start - evicting dirty pages from cache once the given threshold has been reached. -WT-1728 Add a WT_SESSION::reset method to release resources held by a session. -WT-1930 Allow setting "file_manager=(close_idle_time=0)" to ::wiredtiger_open and - WT_CONNECTION::reconfigure to disable closing idle handles. -WT-1959 Change verify to distinguish between warnings and errors. Add a new strict mode - to verify that causes warnings to be reported as errors. Use strict mode to match earlier - behavior. See the upgrading documentation for more information. -WT-1980 Add a new "metadata:create" URI to WT_SESSION::open_cursor for metadata cursors - that return strings useful for passing to WT_SESSION::create. -WT-2065 Add a new configuration option to ::wiredtiger_open and - WT_CONNECTION::reconfigure called "shared_cache=(quota)" that limits the amount of shared - cache a participant can be assigned. -WT-2104 Add a method to flush log files via a new WT_SESSION::log_flush API. Made - WT_SESSION::commit_transaction configuration options match WT_SESSION::log_flush. Change - the default WT_SESSION::transaction_sync timeout to 20 minutes rather than infinity. -WT-2151 Enhance logging configuration to allow reconfiguration and add a new "log=(zero_fill)" - configuration option that causes WiredTiger to zero-fill log files on creation. -WT-2200 Add a new configuration option to ::wiredtiger_open called "write_through" that - causes WiredTiger to specify the FILE_FLAG_WRITE_THROUGH on Windows when writing files - (default false, including when "direct_io" is configured). -WT-2217 After a successful call to WT_CURSOR::insert, the key and value will be - cleared from the cursor. See the upgrading documentation for more information. -SERVER-17078 Add a "statistics=(size)" mode to statistics cursors, which allows for - retrieving file size only. -SERVER-18356 Changed the handling of the "config_base" option to ::wiredtiger_open. See - upgrading documentation for more information. +* WT-147 Create indexes on non-empty tables. +* WT-1315 Add an implementation of cursor joins via a new WT_SESSION::join API. +* WT-1350 Add a new configuration option to ::wiredtiger_open and + WT_CONNECTION::reconfigure called "eviction_dirty_trigger" that causes eviction to start + evicting dirty pages from cache once the given threshold has been reached. +* WT-1728 Add a WT_SESSION::reset method to release resources held by a session. +* WT-1930 Allow setting "file_manager=(close_idle_time=0)" to ::wiredtiger_open and + WT_CONNECTION::reconfigure to disable closing idle handles. +* WT-1959 Change verify to distinguish between warnings and errors. Add a new strict mode + to verify that causes warnings to be reported as errors. Use strict mode to match earlier + behavior. See the upgrading documentation for more information. +* WT-1980 Add a new "metadata:create" URI to WT_SESSION::open_cursor for metadata cursors + that return strings useful for passing to WT_SESSION::create. +* WT-2065 Add a new configuration option to ::wiredtiger_open and + WT_CONNECTION::reconfigure called "shared_cache=(quota)" that limits the amount of shared + cache a participant can be assigned. +* WT-2104 Add a method to flush log files via a new WT_SESSION::log_flush API. Made + WT_SESSION::commit_transaction configuration options match WT_SESSION::log_flush. Change + the default WT_SESSION::transaction_sync timeout to 20 minutes rather than infinity. +* WT-2151 Enhance logging configuration to allow reconfiguration and add a new "log=(zero_fill)" + configuration option that causes WiredTiger to zero-fill log files on creation. +* WT-2200 Add a new configuration option to ::wiredtiger_open called "write_through" that + causes WiredTiger to specify the FILE_FLAG_WRITE_THROUGH on Windows when writing files + (default false, including when "direct_io" is configured). +* WT-2217 After a successful call to WT_CURSOR::insert, the key and value will be + cleared from the cursor. See the upgrading documentation for more information. +* SERVER-17078 Add a "statistics=(size)" mode to statistics cursors, which allows for + retrieving file size only. +* SERVER-18356 Changed the handling of the "config_base" option to ::wiredtiger_open. See + upgrading documentation for more information. The following statistics were removed: -WT-1481 connection dhandles swept. -WT-1481 connection candidate referenced. -WT-1481 failed to find a slot large enough for record. -WT-1989 log buffer size increases. -WT-1989 slots selected for switching that were unavailable. -WT-2094 log records written directly. -WT-2094 record size exceeded maximum. -WT-2182 pages split during eviction. +* WT-1481 connection dhandles swept. +* WT-1481 connection candidate referenced. +* WT-1481 failed to find a slot large enough for record. +* WT-1989 log buffer size increases. +* WT-1989 slots selected for switching that were unavailable. +* WT-2094 log records written directly. +* WT-2094 record size exceeded maximum. +* WT-2182 pages split during eviction. Lookaside table: -WT-1967 Allow eviction of updates required by old readers. -WT-2074 Fix a race between lookaside table reconciliation and checkpoints. -WT-2149 Fix the order of creation of the lookaside table. -WT-2190 Fix transaction visibility test that is applied to the lookaside table. -SERVER-21585 Don't use the lookaside file until the cache is stuck full. +* WT-1967 Allow eviction of updates required by old readers. +* WT-2074 Fix a race between lookaside table reconciliation and checkpoints. +* WT-2149 Fix the order of creation of the lookaside table. +* WT-2190 Fix transaction visibility test that is applied to the lookaside table. +* SERVER-21585 Don't use the lookaside file until the cache is stuck full. Issues fixed in MongoDB: -SERVER-18829 Have pages start in the middle of the LRU queue for eviction. -SERVER-18838 During drops, don't remove files until the metadata is durable. -SERVER-18875 Clean up deleted pages. -SERVER-18899 Add unit test to simulate fsyncLock. -SERVER-19340 Avoid type aliasing in the random number generator. -SERVER-19445 Have the oldest transaction update the oldest tracked ID. -SERVER-19522 Try to evict internal pages with no useful child pages. -SERVER-19573 Change row-store inserts to avoid page locking. -SERVER-19751 Retry pthread_create on EAGAIN or EINTR. -SERVER-19954 Don't scan tracked handles during checkpoints. -SERVER-19989 Add a write barrier before data handles are added to shared lists. -SERVER-19990 Don't assert on eviction of live updates from dead trees. -SERVER-20008 Don't reset eviction walks when hitting a busy page. -SERVER-20159 Make all readers wait while the cache is full. -SERVER-20193 Fix obsolete transaction check. -SERVER-20303 Tune in-memory splits when inserting large objects. -SERVER-20385 Make WT_CURSOR::next(random) more random. -SERVER-21027 Reverse split if there are many deleted pages. -SERVER-21553 Enable fast-path truncate after splits. -SERVER-21619 Don't do internal page splits after a tree is marked DEAD. -SERVER-21691 Avoid insert stalls. +* SERVER-18829 Have pages start in the middle of the LRU queue for eviction. +* SERVER-18838 During drops, don't remove files until the metadata is durable. +* SERVER-18875 Clean up deleted pages. +* SERVER-18899 Add unit test to simulate fsyncLock. +* SERVER-19340 Avoid type aliasing in the random number generator. +* SERVER-19445 Have the oldest transaction update the oldest tracked ID. +* SERVER-19522 Try to evict internal pages with no useful child pages. +* SERVER-19573 Change row-store inserts to avoid page locking. +* SERVER-19751 Retry pthread_create on EAGAIN or EINTR. +* SERVER-19954 Don't scan tracked handles during checkpoints. +* SERVER-19989 Add a write barrier before data handles are added to shared lists. +* SERVER-19990 Don't assert on eviction of live updates from dead trees. +* SERVER-20008 Don't reset eviction walks when hitting a busy page. +* SERVER-20159 Make all readers wait while the cache is full. +* SERVER-20193 Fix obsolete transaction check. +* SERVER-20303 Tune in-memory splits when inserting large objects. +* SERVER-20385 Make WT_CURSOR::next(random) more random. +* SERVER-21027 Reverse split if there are many deleted pages. +* SERVER-21553 Enable fast-path truncate after splits. +* SERVER-21619 Don't do internal page splits after a tree is marked DEAD. +* SERVER-21691 Avoid insert stalls. Other note worthy changes since the previous release: -WT-1744 Throttle worker threads based on eviction targets. -WT-1845 Allow read only transactions to commit after failure. -WT-1869 Avoid doing in memory splits while checkpointing a tree. -WT-1942 Add atomic implementations for PPC64 architecture. -WT-1962 Make the hot_backup_lock a read/write lock. -WT-1963 Fix backup cursor Java API. -WT-1964 Fix a bug in the Java API when closing handles from a different thread. -WT-1966 Change how the shared cache assigns priority to participants. -WT-1975 Ensure previous log files are complete for forced sync. -WT-1977 Improve performance of getting snapshots with many sessions. -WT-1978 Better checking and tests for index cursor comparison. -WT-1981 Fix a signed 32-bit integer unpacking bug. -WT-1982 Fix a bug where cached overflow items were freed too early. -WT-1985 Integer packing and other fixes for Python and Java. -WT-1986 Fix a race renaming temporary log files. -WT-1989 Improve scalability of log writes. -WT-1996 Fix a bug where we would free the fist update during a page rewrite on error. -WT-1998 Fixes for indexes with some rarely used key/value formats. -WT-2002 Fix a bug in verify where it would panic when encountering a corrupted file. -WT-2007 Statically allocate log slot buffers to a maximum size. -WT-2008 Fix a bug in recovery where a file create went missing. -WT-2009 Apply tracked metadata operations post-commit. -WT-2012 Fix a bug updating the oldest ID. -WT-2013 Add gcc asm definitions for ARM64. -WT-2014 Fix a bug in checkpoints where files could be flushed in the wrong order. -WT-2015 Fix a bug in error handling during block open. -WT-2017 Once an eviction server thread is started keep it running. -WT-2019 Fix a logic bug tracking the maximum transaction ID in clean trees. -WT-2020 Clarify checksum error failure messages. -WT-2021 Fix a bug moving the oldest ID forward (introduced by WT-1967). -WT-2022 Fix a bug not releasing a handle when opening a non-existent index cursor. -WT-2023 Improve locking primitives: simplify read-write lock operations. -WT-2029 Improve scalability of statistics. -WT-2031 Log slot revamp. -WT-2032 Improve next_random cursors to work with small trees. -WT-2034 Improve shared cache balancing algorithm. -WT-2035 For index cursors, keep track of which column groups need to be positioned. -WT-2036 Make handle sweeps more robust. -WT-2037 Only write a checkpoint to the log on close if it wasn't. -WT-2038 Avoid long scans holding the handle list lock. -WT-2039 Add error check and unit test for log records over 4 GB. -WT-2042 Only try to evict tombstones that are visible to all readers. -WT-2045 Don't let the eviction server do slow reconciliation, it can stall eviction. -WT-2046 Add a statistic for search restarts. -WT-2047 Fix a bug in the random generator code to handle an uninitialized state. -WT-2050 Show size with memory allocation errors. -WT-2053 Fix a bug in disk verify messages. -WT-2056 Reorder btree cursor close so stats are maintained correctly. -WT-2057 Remove the verbose configuration when writing the base configuration file. -WT-2058 Fix an alignment bug in the mutex and log-slot code. -WT-2059 Include non-aggregated stats in cursor results. -WT-2062 Try harder to make progress on in-memory splits. -WT-2064 Don't spin indefinitely waiting for the handle list lock in eviction. -WT-2066 Update the oldest transaction ID from eviction. -WT-2068 Protect discarding handles with the handle list lock. -WT-2075 Fix a hang in logging with parallel workload. -WT-2078 Fix a bug in error handling with statistics cursors. -WT-2081 Make verify progress reporting less verbose. -WT-2085 Run some of the log_server threads operations more frequently. -WT-2086 Add a statistic to track when eviction finds a page that can be split. -WT-2089 Relax restrictions on multiblock eviction and in-memory splits. -WT-2090 Fix a bug in the Windows OS layer that swallowed error returns. -WT-2092 Free log condition variables after all threads are joined. -WT-2093 Use the C99 bool type to clarify when functions return true/false. -WT-2094 Eliminate direct write and record unbuffered log records. -WT-2097 Reintroduce immediate waits when forced eviction is necessary. -WT-2100 Rename evict to evict_queue so it's easier to search for. -WT-2101 Don't update the logging ckpt_lsn on clean shutdown. -WT-2102 Fix a hang in log slot join when forcing log writes. -WT-2105 Fix a bug where we could reference an invalid memory address if a file is corrupted on disk. -WT-2108 Rework in-memory page rewrite support (WT_PM_REC_REWRITE). -WT-2114 Make application eviction fairer. -WT-2115 Don't skip truncated pages that are part of a checkpoint. -WT-2116 Add diagnostic checks for stuck cache and dump the state. -WT-2119 Don't evict clean multiblock pages with overflow items during checkpoints. -WT-2126 Clean up if there is an error during splits. -WT-2127 Deepen the tree more regularly to avoid wide internal pages. -WT-2128 When decoding huffman encoding during salvage it's possible to have fewer bits than the - symbol length during decoding, if the value has been corrupted. -WT-2131 Switch to using a lock to control page splits to avoid starvation. -WT-2132 Make debug dump function more robust to errors. -WT-2134 Flush all buffered log records in log_flush. -WT-2135 Fix log_only setting for backup cursor. Fix initialization. -WT-2137 Check the sync_lsn is in the correct file before moving it forward. -WT-2139 Fix a transaction visibility bug in read-uncommitted transactions. -WT-2146 Improve performance when searching for short keys. -WT-2148 Fix a compiler warning in encoding functions. -WT-2153 Fix bug. Now we always need to start the log_server thread. -WT-2154 Make btree dump safer. -WT-2155 Remove last use of F_CAS_ATOMIC and the associated macro. -WT-2156 Allow eviction workers to restart. -WT-2157 Fix a bug where a failed page split could lead to incomplete checkpoints. -WT-2159 Don't check the config twice in one path. -WT-2162 Add null pointer check, needed after an index is dropped. -WT-2164 Prevent another LSM chunk checkpoint while the first is still in progress. -WT-2165 Stop using FALLOC_FL_KEEP_SIZE flag when pre-allocating files. -WT-2167 Switch recovery to using an internal session. -WT-2170 Protect the turtle file with a lock. -WT-2174 Avoid the table list lock when creating a size only statistics cursor. -WT-2178 In-memory storage engine support. -WT-2179 Added decorator to mark txn13 as part of the --long test suite. -WT-2180 Remove cursor.{search,search-near,remove} key size validation. -WT-2182 When internal pages grow large enough, split them into their parents. -WT-2184 Fix log scan bug when final record has many trailing zeros. -WT-2185 Don't do reverse splits when closing a file. -WT-2187 Add flag for flushing a slot. -WT-2189 Update flag set and clear macros to be less error prone. -WT-2191 In-memory disk image no longer the same as saved updates. -WT-2192 Fix the logic around checking whether internal page is evictable. -WT-2193 Handle read-committed metadata checkpoints during snapshot transactions. -WT-2194 Java close callbacks should handle cursors that Java code did not open. -WT-2195 Fix a hang after giving up on a reverse split. -WT-2196 Fix error handling in size only statistics. -WT-2199 Fix transaction sync inconsistency. -WT-2203 Release an allocated page on error. -WT-2204 Don't take a local copy of page->modify until we know the page is dirty. -WT-2206 Change cache operations from flags to an enumeration. -WT-2207 Track whenever a session has a handle exclusive. -WT-2210 Raw compression fails if row-store recovery precedes column-store recovery. -WT-2212 Add a "use_environment" config to ::wiredtiger_open. -WT-2218 Add truncate stats. -WT-2219 Enhancements to in-memory testing. -WT-2220 Update time comparison macros. -WT-2222 Add statistics for named snapshots. -WT-2224 Track which deleted refs are discarded by a split. -WT-2228 Avoid unnecessary raw-compression calls. -WT-2237 Have threads publish unique transaction IDs so that updates always become visible - immediately on commit. -WT-2241 Use a lock to protect transaction ID allocation. -WT-2243 Don't keep transaction IDs pinned for reading from checkpoints. -WT-2244 Trigger in-memory splits sooner. -WT-2248 WT_SESSION::close is updating WT_CONNECTION_IMPL.default_session. -WT-2249 Keep eviction stuck until cache usage is under 100%. -WT-2250 Minor fix. Use SET instead of increment for stat. -WT-2251 Free addresses when we discard deleted page references. -WT-2253 Evict pages left behind by in-memory splits. -WT-2257 Fixes when given multiple thread workload configurations. -WT-2260 Avoid adding internal pages to the eviction queue +* WT-1744 Throttle worker threads based on eviction targets. +* WT-1845 Allow read only transactions to commit after failure. +* WT-1869 Avoid doing in memory splits while checkpointing a tree. +* WT-1942 Add atomic implementations for PPC64 architecture. +* WT-1962 Make the hot_backup_lock a read/write lock. +* WT-1963 Fix backup cursor Java API. +* WT-1964 Fix a bug in the Java API when closing handles from a different thread. +* WT-1966 Change how the shared cache assigns priority to participants. +* WT-1975 Ensure previous log files are complete for forced sync. +* WT-1977 Improve performance of getting snapshots with many sessions. +* WT-1978 Better checking and tests for index cursor comparison. +* WT-1981 Fix a signed 32-bit integer unpacking bug. +* WT-1982 Fix a bug where cached overflow items were freed too early. +* WT-1985 Integer packing and other fixes for Python and Java. +* WT-1986 Fix a race renaming temporary log files. +* WT-1989 Improve scalability of log writes. +* WT-1996 Fix a bug where we would free the fist update during a page rewrite on error. +* WT-1998 Fixes for indexes with some rarely used key/value formats. +* WT-2002 Fix a bug in verify where it would panic when encountering a corrupted file. +* WT-2007 Statically allocate log slot buffers to a maximum size. +* WT-2008 Fix a bug in recovery where a file create went missing. +* WT-2009 Apply tracked metadata operations post-commit. +* WT-2012 Fix a bug updating the oldest ID. +* WT-2013 Add gcc asm definitions for ARM64. +* WT-2014 Fix a bug in checkpoints where files could be flushed in the wrong order. +* WT-2015 Fix a bug in error handling during block open. +* WT-2017 Once an eviction server thread is started keep it running. +* WT-2019 Fix a logic bug tracking the maximum transaction ID in clean trees. +* WT-2020 Clarify checksum error failure messages. +* WT-2021 Fix a bug moving the oldest ID forward (introduced by WT-1967). +* WT-2022 Fix a bug not releasing a handle when opening a non-existent index cursor. +* WT-2023 Improve locking primitives: simplify read-write lock operations. +* WT-2029 Improve scalability of statistics. +* WT-2031 Log slot revamp. +* WT-2032 Improve next_random cursors to work with small trees. +* WT-2034 Improve shared cache balancing algorithm. +* WT-2035 For index cursors, keep track of which column groups need to be positioned. +* WT-2036 Make handle sweeps more robust. +* WT-2037 Only write a checkpoint to the log on close if it wasn't. +* WT-2038 Avoid long scans holding the handle list lock. +* WT-2039 Add error check and unit test for log records over 4 GB. +* WT-2042 Only try to evict tombstones that are visible to all readers. +* WT-2045 Don't let the eviction server do slow reconciliation, it can stall eviction. +* WT-2046 Add a statistic for search restarts. +* WT-2047 Fix a bug in the random generator code to handle an uninitialized state. +* WT-2050 Show size with memory allocation errors. +* WT-2053 Fix a bug in disk verify messages. +* WT-2056 Reorder btree cursor close so stats are maintained correctly. +* WT-2057 Remove the verbose configuration when writing the base configuration file. +* WT-2058 Fix an alignment bug in the mutex and log-slot code. +* WT-2059 Include non-aggregated stats in cursor results. +* WT-2062 Try harder to make progress on in-memory splits. +* WT-2064 Don't spin indefinitely waiting for the handle list lock in eviction. +* WT-2066 Update the oldest transaction ID from eviction. +* WT-2068 Protect discarding handles with the handle list lock. +* WT-2075 Fix a hang in logging with parallel workload. +* WT-2078 Fix a bug in error handling with statistics cursors. +* WT-2081 Make verify progress reporting less verbose. +* WT-2085 Run some of the log_server threads operations more frequently. +* WT-2086 Add a statistic to track when eviction finds a page that can be split. +* WT-2089 Relax restrictions on multiblock eviction and in-memory splits. +* WT-2090 Fix a bug in the Windows OS layer that swallowed error returns. +* WT-2092 Free log condition variables after all threads are joined. +* WT-2093 Use the C99 bool type to clarify when functions return true/false. +* WT-2094 Eliminate direct write and record unbuffered log records. +* WT-2097 Reintroduce immediate waits when forced eviction is necessary. +* WT-2100 Rename evict to evict_queue so it's easier to search for. +* WT-2101 Don't update the logging ckpt_lsn on clean shutdown. +* WT-2102 Fix a hang in log slot join when forcing log writes. +* WT-2105 Fix a bug where we could reference an invalid memory address if a file is corrupted on disk. +* WT-2108 Rework in-memory page rewrite support (WT_PM_REC_REWRITE). +* WT-2114 Make application eviction fairer. +* WT-2115 Don't skip truncated pages that are part of a checkpoint. +* WT-2116 Add diagnostic checks for stuck cache and dump the state. +* WT-2119 Don't evict clean multiblock pages with overflow items during checkpoints. +* WT-2126 Clean up if there is an error during splits. +* WT-2127 Deepen the tree more regularly to avoid wide internal pages. +* WT-2128 When decoding huffman encoding during salvage it's possible to have fewer bits than the + symbol length during decoding, if the value has been corrupted. +* WT-2131 Switch to using a lock to control page splits to avoid starvation. +* WT-2132 Make debug dump function more robust to errors. +* WT-2134 Flush all buffered log records in log_flush. +* WT-2135 Fix log_only setting for backup cursor. Fix initialization. +* WT-2137 Check the sync_lsn is in the correct file before moving it forward. +* WT-2139 Fix a transaction visibility bug in read-uncommitted transactions. +* WT-2146 Improve performance when searching for short keys. +* WT-2148 Fix a compiler warning in encoding functions. +* WT-2153 Fix bug. Now we always need to start the log_server thread. +* WT-2154 Make btree dump safer. +* WT-2155 Remove last use of F_CAS_ATOMIC and the associated macro. +* WT-2156 Allow eviction workers to restart. +* WT-2157 Fix a bug where a failed page split could lead to incomplete checkpoints. +* WT-2159 Don't check the config twice in one path. +* WT-2162 Add null pointer check, needed after an index is dropped. +* WT-2164 Prevent another LSM chunk checkpoint while the first is still in progress. +* WT-2165 Stop using FALLOC_FL_KEEP_SIZE flag when pre-allocating files. +* WT-2167 Switch recovery to using an internal session. +* WT-2170 Protect the turtle file with a lock. +* WT-2174 Avoid the table list lock when creating a size only statistics cursor. +* WT-2178 In-memory storage engine support. +* WT-2179 Added decorator to mark txn13 as part of the --long test suite. +* WT-2180 Remove cursor.{search,search-near,remove} key size validation. +* WT-2182 When internal pages grow large enough, split them into their parents. +* WT-2184 Fix log scan bug when final record has many trailing zeros. +* WT-2185 Don't do reverse splits when closing a file. +* WT-2187 Add flag for flushing a slot. +* WT-2189 Update flag set and clear macros to be less error prone. +* WT-2191 In-memory disk image no longer the same as saved updates. +* WT-2192 Fix the logic around checking whether internal page is evictable. +* WT-2193 Handle read-committed metadata checkpoints during snapshot transactions. +* WT-2194 Java close callbacks should handle cursors that Java code did not open. +* WT-2195 Fix a hang after giving up on a reverse split. +* WT-2196 Fix error handling in size only statistics. +* WT-2199 Fix transaction sync inconsistency. +* WT-2203 Release an allocated page on error. +* WT-2204 Don't take a local copy of page->modify until we know the page is dirty. +* WT-2206 Change cache operations from flags to an enumeration. +* WT-2207 Track whenever a session has a handle exclusive. +* WT-2210 Raw compression fails if row-store recovery precedes column-store recovery. +* WT-2212 Add a "use_environment" config to ::wiredtiger_open. +* WT-2218 Add truncate stats. +* WT-2219 Enhancements to in-memory testing. +* WT-2220 Update time comparison macros. +* WT-2222 Add statistics for named snapshots. +* WT-2224 Track which deleted refs are discarded by a split. +* WT-2228 Avoid unnecessary raw-compression calls. +* WT-2237 Have threads publish unique transaction IDs so that updates always become visible + immediately on commit. +* WT-2241 Use a lock to protect transaction ID allocation. +* WT-2243 Don't keep transaction IDs pinned for reading from checkpoints. +* WT-2244 Trigger in-memory splits sooner. +* WT-2248 WT_SESSION::close is updating WT_CONNECTION_IMPL.default_session. +* WT-2249 Keep eviction stuck until cache usage is under 100%. +* WT-2250 Minor fix. Use SET instead of increment for stat. +* WT-2251 Free addresses when we discard deleted page references. +* WT-2253 Evict pages left behind by in-memory splits. +* WT-2257 Fixes when given multiple thread workload configurations. +* WT-2260 Avoid adding internal pages to the eviction queue WiredTiger release 2.6.1, 2015-05-13 ------------------------------------ @@ -392,7 +392,7 @@ New features: refs WT-1908 * Add the ability to flag a transaction to be flushed asynchronously on - commit via a new sync=[background] configuration option. Add a new + commit via a new sync=[background] configuration option. Add a new WT_SESSION::transaction_sync API to wait for asynchronous flushes to complete. refs WT-1908, #1943 @@ -1076,7 +1076,7 @@ New features and API changes: * Add new custom extractor functionality to WiredTiger indexes. Allowing an application to define mutated and/or multiple keys for indexes. [#1199] - + * Add a new WT_SESSION::transaction_pinned_range method that allows users to identify when a session is keeping a transaction ID pinned for a long time. [#1314] @@ -1573,7 +1573,7 @@ Significant changes include: [#756, #761] * WiredTiger statistics have been significantly improved: - + Statistics logging has been changed to aggregate information from all open handles. [#709, #717] @@ -1920,7 +1920,7 @@ below: * Make run-time statistics optional, defaulted to "off". * Change how we detect if shared cache is used. It used to rely on a name, - now it will be used if the shared_cache configuration option is included. + now it will be used if the shared_cache configuration option is included. * Add the ability to specify a per-connection reserved size for cache pools. Ensure cache pool reconfiguration is honoured quickly. @@ -2013,7 +2013,7 @@ changes are highlighted below: [392] Move examples/c/ex_test_perf.c to bench/wtperf. [322] Add support for statistics on schema-level objects i.e tables, - column groups, indices. + column groups, indices. * Enhance statistics, including changing the name of some statistics. @@ -2507,9 +2507,9 @@ upgrade. Here is the full list of changes: [#262] Disable dump on child cursors: only the top-level cursor is wrapped in a dump cursor. -[#266] Deal with new / dropped indices in __wt_schema_open_index. +[#266] Deal with new / dropped indices in __wt_schema_open_index. -[#269] Checkpoint handles must not be open when they are overwritten. +[#269] Checkpoint handles must not be open when they are overwritten. [#271] Add support for a reserved checkpoint name "WiredTigerCheckpoint" that opens the object's last checkpoint. @@ -2520,18 +2520,18 @@ upgrade. Here is the full list of changes: cursor equality result in a separate argument. [#275] If exclusive handle is required for an operation and it is not - available, fail immediately: don't block. + available, fail immediately: don't block. [#276] Fix methods that return integer parameters from Python. This - includes cursor.equals and cursor.search_near. + includes cursor.equals and cursor.search_near. [#277] Acquire the schema lock when creating the metadata file. We're single-threaded, so it isn't protecting against anything, but the - handle management code expects to have the schema lock. + handle management code expects to have the schema lock. [#279] Some optimizations for __wt_config_gets_defno. Specifically, if we're dealing with a simple stack of config strings, just parse the - application string rather than the full list of defaults. + application string rather than the full list of defaults. [#279] Split the description string into a set of structures, to reduce the number of string comparisons and manipulation that's required. @@ -2558,19 +2558,19 @@ upgrade. Here is the full list of changes: [#294] If txn_commit fails, document the transaction was rolled-back. [#295] Expand the documentation on using cursors without explicit - transactions. + transactions. [#300] Include all changes whenever closing a file, don't check for - visibility. If updates are skipped while evicting a page, give up. + visibility. If updates are skipped while evicting a page, give up. [#305] Have "wt dump" fail more gracefully if the object doesn't exist. [#310] When freeing a tracked address in reconciliation, clear it to avoid - freeing the same address again on error. + freeing the same address again on error. [#314] Replace cursor.equals with cursor.compare -[#319] Clear the bulk_load_ok flag when closing handles. +[#319] Clear the bulk_load_ok flag when closing handles. * Add an "ancient transaction" statistic so we can find out if they're @@ -2765,7 +2765,7 @@ Fix the "exclusive" config for WT_SESSION::create. [#181] Fix an eviction bug introduced into 1.1.2: when evicting a page with children, remove the children from the LRU eviction queue. Reduce the impact of clearing a page from the LRU queue by marking pages on the queue with a flag -(WT_PAGE_EVICT_LRU). +(WT_PAGE_EVICT_LRU). During an eviction walk, pin pages up to the root so there is no need to spin when attempting to lock a parent page. Use the EVICT_LRU page flag to avoid -- cgit v1.2.1 From d9a28c1c5b3bd3128f2c7b4265425c52c87165c0 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 24 Mar 2016 13:52:04 +1100 Subject: Change version to 2.8.0 in upgrading notes. --- src/docs/upgrading.dox | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index 5c5be214915..5e824fee977 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -1,6 +1,6 @@ /*! @page upgrading Upgrading WiredTiger applications -@section version_271 Upgrading to Version 2.7.1 +@section version_280 Upgrading to Version 2.8.0
LSM metadata
@@ -14,7 +14,7 @@ format will be upgraded automatically, but once updated to the new version
Historically, bulk-load of a column-store object ignored any key set in the cursor and automatically assigned each inserted row the next sequential -record number for its key. In the 2.7.1 release, column-store objects match +record number for its key. In the 2.8.0 release, column-store objects match row-store behavior and require the cursor key be set before an insert. (This allows sparse tables to be created in column-store objects, any skipped records are created as already-deleted rows.) To match the previous -- cgit v1.2.1 From dcc0d797ceae9f11f2854160e92a236074da31d1 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 24 Mar 2016 14:12:02 +1100 Subject: Review edits of the changelog --- NEWS | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index d8c638e748f..9572b70e015 100644 --- a/NEWS +++ b/NEWS @@ -1,14 +1,14 @@ WiredTiger release 2.8.0, 2015-22-03 ------------------------------------ -The WiredTiger 2.8.0 release contains new features, minor API changes and bug -fixes. +The WiredTiger 2.8.0 release contains new features, new supported platforms, +minor API changes and bug fixes. New features and API changes; refer to the API documentation for full details: * WT-60 Port WiredTiger to run on big endian platforms * WT-2287 Add a new WT_SESSION.rebalance API -* WT-2333 Add a no_wait configuration setting to WT_SESSION.drop so it doesn't block +* WT-2333 Add a lock_wait configuration setting to WT_SESSION.drop to avoid blocking * WT-2349 Add a readonly configuration setting to wiredtiger_open * WT-2363 Remove built in support for bzip2 compression * WT-2404 Add streaming pack/unpack methods to the extension API @@ -17,12 +17,13 @@ Significant changes and bug fixes: * WT-1801 Add a directory sync after rollback of a WT_SESSION::rename operation * WT-2130 Improve on-disk page utilization with random workloads -* WT-2275 Fix a database corruption after application crash +* WT-2275 Fix a database corruption after truncate and crash * WT-2264 High update workloads can cause checkpoints to never complete -* WT-2290 Improve how effective WT_SESSION.compact is +* WT-2290 Improve effectiveness of WT_SESSION.compact * WT-2361 Fix a bug in column-store where verify identifies out of order data * WT-2367 Fix a bug in WT_CURSOR.next that could cause out-of-order key returns -* WT-2374 Fix a bug where a database was corrupted after a hard crash +* WT-2374 Fix a bug where a database was corrupted when restoring a backup +* WT-2381 Fix the dump utility to include the table configuration * WT-2451 Allow the WiredTiger metadata to be evicted * WT-2490 Fix a bug in column-store where search_near() returns the wrong key @@ -31,11 +32,11 @@ Issues fixed in MongoDB: * SERVER-21619 sys-perf: WT crash during core_workloads_WT execution * SERVER-21833 Enhance WT_SESSION::compact to more reliably release space * SERVER-21887 Enhance $sample to be faster on newly created collection -* SERVER-22676 Fix a bug in WiredTiger fails to open databases created by 3.0.0 or 3.0.1 -* SERVER-22773 CRC32 implementation on PPC was broken +* SERVER-22676 Allow WiredTiger to open databases created by 3.0.0 or 3.0.1 +* SERVER-22773 New CRC32 implementation on PowerPC * SERVER-22831 Low query rate with heavy cache pressure and an idle collection -Other note worthy changes since the previous release: +Other noteworthy changes since the previous release: * WT-1517 Fix error handling around schema format edge cases * WT-2060 Simplify aggregation of statistics @@ -101,7 +102,6 @@ Other note worthy changes since the previous release: * WT-2372 WiredTiger windows builder fails with C4005 against the "inline" macro * WT-2375 Add tests for custom collators * WT-2378 Fix a hang in LSM when doing forced drop with the no wait option -* WT-2381 Fix the dump utility so it does not discard the table configuration * WT-2382 Fix a bug in join cursors with custom collator for 'u' format * WT-2384 Fix a bug in join cursors where lt, le conditions for ordering could be wrong * WT-2387 Fix cursor random unit test on Windows @@ -138,7 +138,7 @@ Other note worthy changes since the previous release: * WT-2454 Fix checkpoint_sync=false behavior to prevent flushes/sync to disk * WT-2456 Fix PPC CRC32 Code * WT-2457 Fix a bug where dropping an LSM table can fail with EBUSY when no user ops are active -* WT-2459 Allow Configure scripts to provide the --tag option for libtool when compiling on PPC +* WT-2459 Allow configure scripts to provide the --tag option for libtool when compiling on PPC * WT-2460 Fix a bug where checkpoint could fail with WT_ROLLBACK * WT-2471 Update WiredTiger printf formats to be platform aware * WT-2476 Fix a race where btree->evict_lock is being accessed after being destroyed -- cgit v1.2.1 From 50f2361b4dc53b1c30d439a97113c3f6dc657ab1 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 24 Mar 2016 14:47:35 +1100 Subject: Switch PPC to PowerPC. --- NEWS | 14 +++++++------- dist/s_string.ok | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index 9572b70e015..87c6733bd97 100644 --- a/NEWS +++ b/NEWS @@ -58,12 +58,12 @@ Other noteworthy changes since the previous release: * WT-2258 Stop WiredTiger pre-loading pages when direct-IO is configured * WT-2259 Fix error handling when getting exclusive access to a btree * WT-2262 Fix random cursor next so it is not skewed by tree shape -* WT-2265 WiredTiger related change in PPC specific code block in gcc.h +* WT-2265 WiredTiger related change in PowerPC specific code block in gcc.h * WT-2272 Fix a bug in the sweep server that triggered an assertion * WT-2276 Add a tool to decode checkpoint addr * WT-2277 Remove WT check against big-endian systems * WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined -* WT-2280 Add CRC32 Optimized code for PPC +* WT-2280 Add CRC32 Optimized code for PowerPC * WT-2282 Error in wt_txn_update_oldest verbose message test * WT-2283 Retry in txn_update_oldest results in a hang * WT-2285 Enhance configure to set BUFFER_ALIGNMENT_DEFAULT to 4kb on Linux @@ -130,15 +130,15 @@ Other noteworthy changes since the previous release: * WT-2434 Fix a race between force-drop and sweep * WT-2436 Fix a bug in join cursors with lt, le conditions and "strategy=bloom" * WT-2438 Extend WiredTiger stat declarations to help external tools -* WT-2440 Fix a bug in the PPC checksum implementation +* WT-2440 Fix a bug in the PowerPC checksum implementation * WT-2443 Add statistics for all indexes used in join cursor * WT-2447 Enhance join cursor implementation to avoid reading main table where possible * WT-2448 Add no_scale flag to relevant statistics * WT-2449 Enhance configure to check for a 64-bit build * WT-2454 Fix checkpoint_sync=false behavior to prevent flushes/sync to disk -* WT-2456 Fix PPC CRC32 Code -* WT-2457 Fix a bug where dropping an LSM table can fail with EBUSY when no user ops are active -* WT-2459 Allow configure scripts to provide the --tag option for libtool when compiling on PPC +* WT-2456 Fix PowerPC CRC32 Code +* WT-2457 Fix a bug where dropping an LSM table can return EBUSY when no user ops are active +* WT-2459 Allow configure to use the --tag option for libtool when compiling on PowerPC * WT-2460 Fix a bug where checkpoint could fail with WT_ROLLBACK * WT-2471 Update WiredTiger printf formats to be platform aware * WT-2476 Fix a race where btree->evict_lock is being accessed after being destroyed @@ -242,7 +242,7 @@ Other note worthy changes since the previous release: * WT-1744 Throttle worker threads based on eviction targets. * WT-1845 Allow read only transactions to commit after failure. * WT-1869 Avoid doing in memory splits while checkpointing a tree. -* WT-1942 Add atomic implementations for PPC64 architecture. +* WT-1942 Add atomic implementations for PowerPC architecture. * WT-1962 Make the hot_backup_lock a read/write lock. * WT-1963 Fix backup cursor Java API. * WT-1964 Fix a bug in the Java API when closing handles from a different thread. diff --git a/dist/s_string.ok b/dist/s_string.ok index 0ec07b45e8d..79f662aa851 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -242,6 +242,7 @@ Pandis Phong PlatformSDK Posix +PowerPC Pre Preload Prepend -- cgit v1.2.1 From 45ed4d3856f3ce3191c81679900f7d890e92132a Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 24 Mar 2016 15:19:29 +1100 Subject: Fixup dates for release versions --- NEWS | 2 +- README | 2 +- build_posix/aclocal/version-set.m4 | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 87c6733bd97..af8b15488cc 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -WiredTiger release 2.8.0, 2015-22-03 +WiredTiger release 2.8.0, 2015-03-24 ------------------------------------ The WiredTiger 2.8.0 release contains new features, new supported platforms, diff --git a/README b/README index bdb9003c22d..fc0fe79afee 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -WiredTiger 2.8.0: (March 23, 2016) +WiredTiger 2.8.0: (March 24, 2016) This is version 2.8.0 of WiredTiger. diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4 index 746c73499df..bcc2c99c727 100644 --- a/build_posix/aclocal/version-set.m4 +++ b/build_posix/aclocal/version-set.m4 @@ -3,7 +3,7 @@ dnl build by dist/s_version VERSION_MAJOR=2 VERSION_MINOR=8 VERSION_PATCH=0 -VERSION_STRING='"WiredTiger 2.8.0: (March 23, 2016)"' +VERSION_STRING='"WiredTiger 2.8.0: (March 24, 2016)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) -- cgit v1.2.1 From 04a99e3b68664e6201bcb062b6816fd6a71ef040 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 24 Mar 2016 15:46:29 +1100 Subject: Bump release version on develop to 2.8.1 --- README | 6 +++--- RELEASE_INFO | 2 +- build_posix/aclocal/version-set.m4 | 4 ++-- build_posix/aclocal/version.m4 | 2 +- dist/package/wiredtiger.spec | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README b/README index fc0fe79afee..32549d099d5 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ -WiredTiger 2.8.0: (March 24, 2016) +WiredTiger 2.8.1: (March 24, 2016) -This is version 2.8.0 of WiredTiger. +This is version 2.8.1 of WiredTiger. WiredTiger release packages and documentation can be found at: @@ -8,7 +8,7 @@ WiredTiger release packages and documentation can be found at: The documentation for this specific release can be found at: - http://source.wiredtiger.com/2.8.0/index.html + http://source.wiredtiger.com/2.8.1/index.html The WiredTiger source code can be found at: diff --git a/RELEASE_INFO b/RELEASE_INFO index 7d6f8a982b4..d29c29b554c 100644 --- a/RELEASE_INFO +++ b/RELEASE_INFO @@ -1,6 +1,6 @@ WIREDTIGER_VERSION_MAJOR=2 WIREDTIGER_VERSION_MINOR=8 -WIREDTIGER_VERSION_PATCH=0 +WIREDTIGER_VERSION_PATCH=1 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH" WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"` diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4 index bcc2c99c727..997f571782c 100644 --- a/build_posix/aclocal/version-set.m4 +++ b/build_posix/aclocal/version-set.m4 @@ -2,8 +2,8 @@ dnl build by dist/s_version VERSION_MAJOR=2 VERSION_MINOR=8 -VERSION_PATCH=0 -VERSION_STRING='"WiredTiger 2.8.0: (March 24, 2016)"' +VERSION_PATCH=1 +VERSION_STRING='"WiredTiger 2.8.1: (March 24, 2016)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) diff --git a/build_posix/aclocal/version.m4 b/build_posix/aclocal/version.m4 index 9c12f14ad48..0e199cb9546 100644 --- a/build_posix/aclocal/version.m4 +++ b/build_posix/aclocal/version.m4 @@ -1,2 +1,2 @@ dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version -2.8.0 +2.8.1 diff --git a/dist/package/wiredtiger.spec b/dist/package/wiredtiger.spec index a9eff97794c..365c330d86a 100644 --- a/dist/package/wiredtiger.spec +++ b/dist/package/wiredtiger.spec @@ -1,5 +1,5 @@ Name: wiredtiger -Version: 2.8.0 +Version: 2.8.1 Release: 1%{?dist} Summary: WiredTiger data storage engine -- cgit v1.2.1 From 5f5ccea1371f1781d0f3eebee0e876e6884d5506 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 24 Mar 2016 16:29:22 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Convert the Windows sources to the new structure. Create a new function __wt_win32_errno(), which returns the value from GetLastError(), review all Win32 calls to check which should return __wt_win32_errno() and which should return __wt_errno(). Fix a bug where a NULL pointer could be printed in the POSIX and Windows directory-list functions. Rework the changes in __wt_eventv(), I added code using a WT_SESSION handle in the code-path specifically not supposed to use a WT_SESSION handle. For now, hard-code calls using stdio and stderr. Change the utility function's Windows mkdir command to ignore ENOENT. --- build_win/filelist.win | 6 +- dist/s_prototypes | 7 +- dist/s_string.ok | 3 + dist/s_win | 16 +- src/include/extern.h | 30 ++ src/os_posix/os_dir.c | 4 +- src/os_posix/os_init.c | 14 +- src/os_posix/os_open.c | 5 +- src/os_posix/os_posix.c | 12 +- src/os_win/os_dir.c | 19 +- src/os_win/os_dlopen.c | 8 +- src/os_win/os_errno.c | 27 +- src/os_win/os_exist.c | 33 --- src/os_win/os_fallocate.c | 45 --- src/os_win/os_filesize.c | 64 ----- src/os_win/os_flock.c | 47 ---- src/os_win/os_fsync.c | 71 ----- src/os_win/os_ftruncate.c | 37 --- src/os_win/os_getenv.c | 2 +- src/os_win/os_map.c | 8 +- src/os_win/os_mtx_cond.c | 2 +- src/os_win/os_open.c | 266 ------------------ src/os_win/os_remove.c | 71 ----- src/os_win/os_rename.c | 53 ---- src/os_win/os_rw.c | 102 ------- src/os_win/os_sleep.c | 6 +- src/os_win/os_thread.c | 11 +- src/os_win/os_win.c | 677 ++++++++++++++++++++++++++++++++++++++++++++++ src/support/err.c | 16 +- test/utility/test_util.i | 2 +- 30 files changed, 814 insertions(+), 850 deletions(-) delete mode 100644 src/os_win/os_exist.c delete mode 100644 src/os_win/os_fallocate.c delete mode 100644 src/os_win/os_filesize.c delete mode 100644 src/os_win/os_flock.c delete mode 100644 src/os_win/os_fsync.c delete mode 100644 src/os_win/os_ftruncate.c delete mode 100644 src/os_win/os_open.c delete mode 100644 src/os_win/os_remove.c delete mode 100644 src/os_win/os_rename.c delete mode 100644 src/os_win/os_rw.c create mode 100644 src/os_win/os_win.c diff --git a/build_win/filelist.win b/build_win/filelist.win index 3d6b0a9193f..3e6bcf7f204 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -103,22 +103,21 @@ src/meta/meta_track.c src/meta/meta_turtle.c src/os_posix/os_abort.c src/os_posix/os_alloc.c +src/os_posix/os_fallocate.c src/os_posix/os_getline.c src/os_posix/os_getopt.c src/os_posix/os_init.c src/os_posix/os_inmemory.c -src/os_posix/os_posix.c +src/os_posix/os_open.c src/os_posix/os_stdio.c src/os_posix/os_strtouq.c src/os_win/os_dir.c src/os_win/os_dlopen.c src/os_win/os_errno.c -src/os_win/os_fallocate.c src/os_win/os_getenv.c src/os_win/os_map.c src/os_win/os_mtx_cond.c src/os_win/os_once.c -src/os_win/os_open.c src/os_win/os_pagesize.c src/os_win/os_path.c src/os_win/os_priv.c @@ -127,6 +126,7 @@ src/os_win/os_snprintf.c src/os_win/os_thread.c src/os_win/os_time.c src/os_win/os_vsnprintf.c +src/os_win/os_win.c src/os_win/os_yield.c src/packing/pack_api.c src/packing/pack_impl.c diff --git a/dist/s_prototypes b/dist/s_prototypes index 603c0f5633d..311f5826dc2 100755 --- a/dist/s_prototypes +++ b/dist/s_prototypes @@ -10,7 +10,10 @@ cat < $t f=../src/include/extern.h diff --git a/dist/s_string.ok b/dist/s_string.ok index 7da47d796ca..b07e8dc311d 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -335,6 +335,7 @@ WAL WIREDTIGER WRLSN WRNOLOCK +WaitForSingleObject WakeAllConditionVariable Wconditional WeakHashLen @@ -596,6 +597,7 @@ fblocks fclose fcntl fdatasync +fdopen ffc fflush ffs @@ -843,6 +845,7 @@ optimizations optype ori os +osfhandle ovfl ownp packv diff --git a/dist/s_win b/dist/s_win index 0b7d5184037..6127146dc70 100755 --- a/dist/s_win +++ b/dist/s_win @@ -48,34 +48,26 @@ win_filelist() (sed \ -e 's;os_posix/os_dir.c;os_win/os_dir.c;' \ -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \ - -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \ - -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \ -e 's;os_posix/os_errno.c;os_win/os_errno.c;' \ -e 's;os_posix/os_exist.c;os_win/os_exist.c;' \ - -e 's;os_posix/os_fallocate.c;os_win/os_fallocate.c;' \ - -e 's;os_posix/os_filesize.c;os_win/os_filesize.c;' \ - -e 's;os_posix/os_flock.c;os_win/os_flock.c;' \ - -e 's;os_posix/os_fsync.c;os_win/os_fsync.c;' \ - -e 's;os_posix/os_ftruncate.c;os_win/os_ftruncate.c;' \ -e 's;os_posix/os_getenv.c;os_win/os_getenv.c;' \ -e 's;os_posix/os_map.c;os_win/os_map.c;' \ -e 's;os_posix/os_mtx_cond.c;os_win/os_mtx_cond.c;' \ -e 's;os_posix/os_once.c;os_win/os_once.c;' \ - -e 's;os_posix/os_open.c;os_win/os_open.c;' \ -e 's;os_posix/os_pagesize.c;os_win/os_pagesize.c;' \ -e 's;os_posix/os_path.c;os_win/os_path.c;' \ -e 's;os_posix/os_priv.c;os_win/os_priv.c;' \ - -e 's;os_posix/os_remove.c;os_win/os_remove.c;' \ - -e 's;os_posix/os_rename.c;os_win/os_rename.c;' \ - -e 's;os_posix/os_rw.c;os_win/os_rw.c;' \ -e 's;os_posix/os_sleep.c;os_win/os_sleep.c;' \ -e 's;os_posix/os_thread.c;os_win/os_thread.c;' \ -e 's;os_posix/os_time.c;os_win/os_time.c;' \ -e 's;os_posix/os_yield.c;os_win/os_yield.c;' \ + -e '/src\/os_posix\/os_posix.c/d' \ -e '/src\/support\/power8\/crc32.S/d' \ -e '/src\/support\/power8\/crc32_wrapper.c/d' echo 'src/os_win/os_snprintf.c' - echo 'src/os_win/os_vsnprintf.c') < filelist | sort > $t + echo 'src/os_win/os_vsnprintf.c' + echo 'src/os_win/os_win.c') < filelist | sort > $t + cmp $t $f > /dev/null 2>&1 || (echo "Building $f" && rm -f $f && cp $t $f) } diff --git a/src/include/extern.h b/src/include/extern.h index 068b7544414..7a767dcebf2 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -769,3 +769,33 @@ extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops); extern int __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_recover(WT_SESSION_IMPL *session); +extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); +extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); +extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); +extern int __wt_map_error_rdonly(int winerr); +extern int __wt_errno(void); +extern int __wt_win32_errno(void); +extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); +extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); +extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void**mappingcookie); +extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); +extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); +extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void**mappingcookie); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); +extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); +extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); +extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); +extern int __wt_once(void(*init_routine)(void)); +extern int __wt_get_vm_pagesize(void); +extern bool __wt_absolute_path(const char *path); +extern const char *__wt_path_separator(void); +extern bool __wt_has_priv(void); +extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); +extern void __wt_thread_id(char*buf, size_t buflen); +extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); +extern int __wt_os_win(WT_SESSION_IMPL *session); +extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session); +extern void __wt_yield(void); diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c index a8a8e7d7aaa..49ee1e0f84a 100644 --- a/src/os_posix/os_dir.c +++ b/src/os_posix/os_dir.c @@ -94,5 +94,7 @@ err: __wt_free(session, entries[count]); __wt_free(session, entries); } - WT_RET_MSG(session, ret, "dirlist %s prefix %s", dir, prefix); + WT_RET_MSG(session, ret, + "directory-list %s, prefix \"%s\"", + dir, prefix == NULL ? "" : prefix); } diff --git a/src/os_posix/os_init.c b/src/os_posix/os_init.c index acba622b366..512216c52a5 100644 --- a/src/os_posix/os_init.c +++ b/src/os_posix/os_init.c @@ -16,7 +16,12 @@ int __wt_os_init(WT_SESSION_IMPL *session) { return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? - __wt_os_inmemory(session) : __wt_os_posix(session)); + __wt_os_inmemory(session) : +#if defined(_MSC_VER) + __wt_os_win(session)); +#else + __wt_os_posix(session)); +#endif } /* @@ -27,5 +32,10 @@ int __wt_os_cleanup(WT_SESSION_IMPL *session) { return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? - __wt_os_inmemory_cleanup(session) : __wt_os_posix_cleanup(session)); + __wt_os_inmemory_cleanup(session) : +#if defined(_MSC_VER) + __wt_os_win_cleanup(session)); +#else + __wt_os_posix_cleanup(session)); +#endif } diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 4cd41114b9f..b98532afc02 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -137,8 +137,9 @@ __wt_open(WT_SESSION_IMPL *session, WT_ERR(conn->handle_open(session, fh, name, dio_type, flags)); open_called = true; - /* Set the file's size. */ - WT_ERR(fh->fh_size(session, fh, &fh->size)); + /* Set file sizes. */ + if (dio_type != WT_FILE_TYPE_DIRECTORY) + WT_ERR(fh->fh_size(session, fh, &fh->size)); /* * Repeat the check for a match: if there's no match, link our newly diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index a12dd437fe7..059ac1cf835 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -145,7 +145,7 @@ __posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) /* * __posix_file_remove -- - * POSIX remove. + * Remove a file. */ static int __posix_file_remove(WT_SESSION_IMPL *session, const char *name) @@ -172,7 +172,7 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name) /* * __posix_file_rename -- - * POSIX rename. + * Rename a file. */ static int __posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) @@ -469,7 +469,7 @@ __posix_handle_write(WT_SESSION_IMPL *session, /* * __posix_handle_open -- - * POSIX fopen/open. + * Open a file handle. */ static int __posix_handle_open(WT_SESSION_IMPL *session, @@ -485,12 +485,12 @@ __posix_handle_open(WT_SESSION_IMPL *session, conn = S2C(session); direct_io = false; + path = NULL; /* 0 is a legal file descriptor, set up error handling. */ fh->fd = fd = -1; /* Create the path to the file. */ - path = NULL; if (!LF_ISSET(WT_OPEN_FIXED)) { WT_ERR(__wt_filename(session, name, &path)); name = path; @@ -606,12 +606,12 @@ setupfh: break; } if (stream_mode != NULL && (fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), "%s: fopen", name); + WT_ERR_MSG(session, __wt_errno(), "%s: fdopen", name); __wt_free(session, path); fh->fd = fd; - /* Configure fallocate/posix_fallocate calls. */ + /* Configure fallocate calls. */ __wt_posix_handle_allocate_configure(session, fh); fh->fh_advise = __posix_handle_advise; diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c index 00ec4f252e4..98ddc820850 100644 --- a/src/os_win/os_dir.c +++ b/src/os_win/os_dir.c @@ -9,13 +9,12 @@ #include "wt_internal.h" /* - * __wt_dirlist -- - * Get a list of files from a directory, optionally filtered by - * a given prefix. + * __wt_win_directory_list -- + * Get a list of files from a directory, MSVC version. */ int -__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, - uint32_t flags, char ***dirlist, u_int *countp) +__wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) { HANDLE findhandle; WIN32_FIND_DATA finddata; @@ -56,7 +55,7 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, findhandle = FindFirstFileA(pathbuf->data, &finddata); if (INVALID_HANDLE_VALUE == findhandle) - WT_ERR_MSG(session, __wt_errno(), "%s: FindFirstFile", + WT_ERR_MSG(session, __wt_win32_errno(), "%s: FindFirstFile", pathbuf->data); else { do { @@ -67,6 +66,10 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, strcmp(finddata.cFileName, "..") == 0) continue; match = false; + + /* + * The list of files is optionally filtered by a prefix. + */ if (prefix != NULL && ((LF_ISSET(WT_DIRLIST_INCLUDE) && WT_PREFIX_MATCH(finddata.cFileName, prefix)) || @@ -108,5 +111,7 @@ err: __wt_free(session, entries); } - WT_RET_MSG(session, ret, "dirlist %s prefix %s", dir, prefix); + WT_RET_MSG(session, ret, + "directory-list %s, prefix \"%s\"", + dir, prefix == NULL ? "" : prefix); } diff --git a/src/os_win/os_dlopen.c b/src/os_win/os_dlopen.c index 0bad39d681d..0c0d70ec1c9 100644 --- a/src/os_win/os_dlopen.c +++ b/src/os_win/os_dlopen.c @@ -25,8 +25,8 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) if (path == NULL) { ret = GetModuleHandleExA(0, NULL, (HMODULE *)&dlh->handle); if (ret == FALSE) - WT_ERR_MSG(session, - __wt_errno(), "GetModuleHandleEx(%s): %s", path, 0); + WT_ERR_MSG(session, __wt_win32_errno(), + "GetModuleHandleEx(%s): %s", path, 0); } else { // TODO: load dll here DebugBreak(); @@ -57,7 +57,7 @@ __wt_dlsym(WT_SESSION_IMPL *session, sym = GetProcAddress(dlh->handle, name); if (sym == NULL && fail) { - WT_RET_MSG(session, __wt_errno(), + WT_RET_MSG(session, __wt_win32_errno(), "GetProcAddress(%s in %s)", name, dlh->name); } @@ -75,7 +75,7 @@ __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_DECL_RET; if ((ret = FreeLibrary(dlh->handle)) == FALSE) { - __wt_err(session, __wt_errno(), "FreeLibrary"); + __wt_err(session, __wt_win32_errno(), "FreeLibrary"); } /* Windows returns 0 on failure, WT expects 0 on success */ diff --git a/src/os_win/os_errno.c b/src/os_win/os_errno.c index 590fcdc9d44..1968dadd856 100644 --- a/src/os_win/os_errno.c +++ b/src/os_win/os_errno.c @@ -61,16 +61,35 @@ __wt_map_error_rdonly(int winerr) */ int __wt_errno(void) +{ + /* + * Check for 0: + * It's easy to introduce a problem by calling the wrong error function, + * for example, this function when the MSVC function set the C runtime + * error value. Handle gracefully and always return an error. + */ + return (errno == 0 ? WT_ERROR : errno); +} + +/* + * __wt_win32_errno -- + * Return GetLastError, or WT_ERROR if error not set. + */ +int +__wt_win32_errno(void) { /* * Called when we know an error occurred, and we want the system - * error code, but there's some chance it's not set. + * error code. */ DWORD err = GetLastError(); - /* GetLastError should only be called if we hit an actual error */ - WT_ASSERT(NULL, err != ERROR_SUCCESS); - + /* + * Check for ERROR_SUCCESS: + * It's easy to introduce a problem by calling the wrong error function, + * for example, this function when the MSVC function set the C runtime + * error value. Handle gracefully and always return an error. + */ return (err == ERROR_SUCCESS ? WT_ERROR : __wt_map_windows_error_to_error(err)); } diff --git a/src/os_win/os_exist.c b/src/os_win/os_exist.c deleted file mode 100644 index ec1369cc727..00000000000 --- a/src/os_win/os_exist.c +++ /dev/null @@ -1,33 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_exist -- - * Return if the file exists. - */ -int -__wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp) -{ - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, filename, &path)); - - ret = GetFileAttributesA(path); - - __wt_free(session, path); - - if (ret != INVALID_FILE_ATTRIBUTES) - *existp = true; - else - *existp = false; - - return (0); -} diff --git a/src/os_win/os_fallocate.c b/src/os_win/os_fallocate.c deleted file mode 100644 index a324687ca73..00000000000 --- a/src/os_win/os_fallocate.c +++ /dev/null @@ -1,45 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_fallocate_config -- - * Configure fallocate behavior for a file handle. - */ -void -__wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_UNUSED(session); - - /* - * fallocate on Windows would be implemented using SetEndOfFile, which - * can also truncate the file. WiredTiger expects fallocate to ignore - * requests to truncate the file which Windows does not do, so we don't - * support the call. - */ - fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; - fh->fallocate_requires_locking = false; -} - -/* - * __wt_fallocate -- - * Allocate space for a file handle. - */ -int -__wt_fallocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - - return (ENOTSUP); -} diff --git a/src/os_win/os_filesize.c b/src/os_win/os_filesize.c deleted file mode 100644 index c9925fb18a8..00000000000 --- a/src/os_win/os_filesize.c +++ /dev/null @@ -1,64 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_filesize -- - * Get the size of a file in bytes. - */ -int -__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - LARGE_INTEGER size; - WT_DECL_RET; - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: GetFileSizeEx", fh->name)); - - if ((ret = GetFileSizeEx(fh->filehandle, &size)) != 0) { - *sizep = size.QuadPart; - return (0); - } - - WT_RET_MSG(session, __wt_errno(), "%s: GetFileSizeEx", fh->name); -} - -/* - * __wt_filesize_name -- - * Return the size of a file in bytes, given a file name. - */ -int -__wt_filesize_name(WT_SESSION_IMPL *session, - const char *filename, bool silent, wt_off_t *sizep) -{ - WIN32_FILE_ATTRIBUTE_DATA data; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, filename, &path)); - - ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); - - __wt_free(session, path); - - if (ret != 0) { - *sizep = - ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; - return (0); - } - - /* - * Some callers of this function expect failure if the file doesn't - * exist, and don't want an error message logged. - */ - ret = __wt_errno(); - if (!silent) - WT_RET_MSG(session, ret, "%s: GetFileAttributesEx", filename); - return (ret); -} diff --git a/src/os_win/os_flock.c b/src/os_win/os_flock.c deleted file mode 100644 index 60a981499a5..00000000000 --- a/src/os_win/os_flock.c +++ /dev/null @@ -1,47 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_bytelock -- - * Lock/unlock a byte in a file. - */ -int -__wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock) -{ - WT_DECL_RET; - - /* - * WiredTiger requires this function be able to acquire locks past - * the end of file. - * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - * - * http://msdn.microsoft.com/ - * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx - * - * You can lock bytes that are beyond the end of the current file. - * This is useful to coordinate adding records to the end of a file. - */ - if (lock) { - ret = LockFile(fhp->filehandle, UINT32_MAX & byte, - UINT32_MAX & (byte >> 32), 1, 0); - } else { - ret = UnlockFile(fhp->filehandle, UINT32_MAX & byte, - UINT32_MAX & (byte >> 32), 1, 0); - } - - if (ret == FALSE) - WT_RET_MSG(NULL, __wt_errno(), "%s: LockFile", fhp->name); - - return (0); -} diff --git a/src/os_win/os_fsync.c b/src/os_win/os_fsync.c deleted file mode 100644 index c196fc6c06a..00000000000 --- a/src/os_win/os_fsync.c +++ /dev/null @@ -1,71 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_directory_sync_fh -- - * Flush a directory file handle. - */ -int -__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_UNUSED(session); - WT_UNUSED(fh); - return (0); -} - -/* - * __wt_directory_sync -- - * Flush a directory to ensure a file creation is durable. - */ -int -__wt_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -} - -/* - * __wt_fsync -- - * Flush a file handle. - */ -int -__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: FlushFileBuffers", - fh->name)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) - WT_RET_MSG(session, - __wt_errno(), "%s FlushFileBuffers error", fh->name); - - return (0); -} - -/* - * __wt_fsync_async -- - * Flush a file handle and don't wait for the result. - */ -int -__wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_UNUSED(session); - WT_UNUSED(fh); - - return (0); -} diff --git a/src/os_win/os_ftruncate.c b/src/os_win/os_ftruncate.c deleted file mode 100644 index 88fcf9542c1..00000000000 --- a/src/os_win/os_ftruncate.c +++ /dev/null @@ -1,37 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_ftruncate -- - * Truncate a file. - */ -int -__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - LARGE_INTEGER largeint; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - largeint.QuadPart = len; - - if ((ret = SetFilePointerEx( - fh->filehandle_secondary, largeint, NULL, FILE_BEGIN)) == FALSE) - WT_RET_MSG(session, __wt_errno(), "%s SetFilePointerEx error", - fh->name); - - ret = SetEndOfFile(fh->filehandle_secondary); - if (ret != FALSE) - return (0); - - if (GetLastError() == ERROR_USER_MAPPED_FILE) - return (EBUSY); - - WT_RET_MSG(session, __wt_errno(), "%s SetEndOfFile error", fh->name); -} diff --git a/src/os_win/os_getenv.c b/src/os_win/os_getenv.c index c9084769cd5..cb095381942 100644 --- a/src/os_win/os_getenv.c +++ b/src/os_win/os_getenv.c @@ -29,7 +29,7 @@ __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) ret = GetEnvironmentVariableA(variable, *envp, size); /* We expect the number of bytes not including nul terminator. */ if ((ret + 1) != size) - WT_RET_MSG(session, __wt_errno(), + WT_RET_MSG(session, __wt_win32_errno(), "GetEnvironmentVariableA failed: %s", variable); return (0); diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index dc040b4fa54..96146705fe4 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -30,7 +30,7 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, *mappingcookie = CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); if (*mappingcookie == NULL) - WT_RET_MSG(session, __wt_errno(), + WT_RET_MSG(session, __wt_win32_errno(), "%s CreateFileMapping error: failed to map %" WT_SIZET_FMT " bytes", fh->name, orig_size); @@ -40,7 +40,7 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, CloseHandle(*mappingcookie); *mappingcookie = NULL; - WT_RET_MSG(session, __wt_errno(), + WT_RET_MSG(session, __wt_win32_errno(), "%s map error: failed to map %" WT_SIZET_FMT " bytes", fh->name, orig_size); } @@ -93,14 +93,14 @@ __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, fh->name, map, len)); if (UnmapViewOfFile(map) == 0) { - WT_RET_MSG(session, __wt_errno(), + WT_RET_MSG(session, __wt_win32_errno(), "%s UnmapViewOfFile error: failed to unmap %" WT_SIZET_FMT " bytes", fh->name, len); } if (CloseHandle(*mappingcookie) == 0) { - WT_RET_MSG(session, __wt_errno(), + WT_RET_MSG(session, __wt_win32_errno(), "CloseHandle: MapViewOfFile: %s", fh->name); } diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c index 14bac2a99d9..28fd24e68b0 100644 --- a/src/os_win/os_mtx_cond.c +++ b/src/os_win/os_mtx_cond.c @@ -103,7 +103,7 @@ __wt_cond_wait_signal( if ((err = GetLastError()) == ERROR_TIMEOUT) *signalled = false; else - ret = __wt_errno(); + ret = __wt_win32_errno(); } else ret = 0; diff --git a/src/os_win/os_open.c b/src/os_win/os_open.c deleted file mode 100644 index f10582c5bd1..00000000000 --- a/src/os_win/os_open.c +++ /dev/null @@ -1,266 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_open -- - * Open a file handle. - */ -int -__wt_open(WT_SESSION_IMPL *session, - const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp) -{ - DWORD dwCreationDisposition; - HANDLE filehandle, filehandle_secondary; - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh, *tfh; - uint64_t bucket, hash; - int f, share_mode; - bool direct_io, matched; - char *path; - - conn = S2C(session); - fh = NULL; - path = NULL; - filehandle = INVALID_HANDLE_VALUE; - filehandle_secondary = INVALID_HANDLE_VALUE; - direct_io = false; - hash = __wt_hash_city64(name, strlen(name)); - bucket = hash % WT_HASH_ARRAY_SIZE; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name)); - - /* Increment the reference count if we already have the file open. */ - matched = false; - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) - if (strcmp(name, tfh->name) == 0) { - ++tfh->ref; - *fhp = tfh; - matched = true; - break; - } - __wt_spin_unlock(session, &conn->fh_lock); - if (matched) - return (0); - - /* For directories, create empty file handles with invalid handles */ - if (dio_type == WT_FILE_TYPE_DIRECTORY) { - goto setupfh; - } - - WT_RET(__wt_filename(session, name, &path)); - - /* - * If this is a read-only connection, open all files read-only - * except the lock file. - */ - if (F_ISSET(conn, WT_CONN_READONLY) && - !WT_STRING_MATCH(name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))) - share_mode = FILE_SHARE_READ; - else - share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE; - - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. - * - * TODO: Set tighter file permissions but set bInheritHandle to false - * to prevent inheritance - */ - - f = FILE_ATTRIBUTE_NORMAL; - - dwCreationDisposition = 0; - if (ok_create) { - WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || - WT_STRING_MATCH(name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - dwCreationDisposition = CREATE_NEW; - if (exclusive) - dwCreationDisposition = CREATE_ALWAYS; - } else - dwCreationDisposition = OPEN_EXISTING; - - /* - * direct_io means no OS file caching. This requires aligned buffer - * allocations like O_DIRECT. - */ - if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { - f |= FILE_FLAG_NO_BUFFERING; - direct_io = true; - } - - /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ - if (dio_type && FLD_ISSET(conn->write_through, dio_type)) { - f |= FILE_FLAG_WRITE_THROUGH; - } - - if (dio_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { - f |= FILE_FLAG_WRITE_THROUGH; - } - - /* Disable read-ahead on trees: it slows down random read workloads. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - f |= FILE_FLAG_RANDOM_ACCESS; - - filehandle = CreateFileA(path, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - dwCreationDisposition, - f, - NULL); - if (filehandle == INVALID_HANDLE_VALUE) { - if (GetLastError() == ERROR_FILE_EXISTS && ok_create) - filehandle = CreateFileA(path, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - OPEN_EXISTING, - f, - NULL); - - if (filehandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_errno(), - direct_io ? - "%s: open failed with direct I/O configured, some " - "filesystem types do not support direct I/O" : - "%s", path); - } - - /* - * Open a second handle to file to support allocation/truncation - * concurrently with reads on the file. Writes would also move the file - * pointer. - */ - filehandle_secondary = CreateFileA(path, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - OPEN_EXISTING, - f, - NULL); - if (filehandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_errno(), - "open failed for secondary handle: %s", path); - -setupfh: - WT_ERR(__wt_calloc_one(session, &fh)); - WT_ERR(__wt_strdup(session, name, &fh->name)); - fh->name_hash = hash; - fh->filehandle = filehandle; - fh->filehandle_secondary = filehandle_secondary; - fh->ref = 1; - fh->direct_io = direct_io; - - /* Set the file's size. */ - if (dio_type != WT_FILE_TYPE_DIRECTORY) - WT_ERR(__wt_filesize(session, fh, &fh->size)); - - /* Configure file extension. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - fh->extend_len = conn->data_extend_len; - - /* Configure fallocate/posix_fallocate calls. */ - __wt_fallocate_config(session, fh); - - /* - * Repeat the check for a match, but then link onto the database's list - * of files. - */ - matched = false; - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) - if (strcmp(name, tfh->name) == 0) { - ++tfh->ref; - *fhp = tfh; - matched = true; - break; - } - if (!matched) { - WT_CONN_FILE_INSERT(conn, fh, bucket); - (void)__wt_atomic_add32(&conn->open_file_count, 1); - - *fhp = fh; - } - __wt_spin_unlock(session, &conn->fh_lock); - if (matched) { -err: if (fh != NULL) { - __wt_free(session, fh->name); - __wt_free(session, fh); - } - if (filehandle != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle); - if (filehandle_secondary != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle_secondary); - } - - __wt_free(session, path); - return (ret); -} - -/* - * __wt_close -- - * Close a file handle. - */ -int -__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh; - uint64_t bucket; - - conn = S2C(session); - - if (*fhp == NULL) - return (0); - fh = *fhp; - *fhp = NULL; - - __wt_spin_lock(session, &conn->fh_lock); - if (fh == NULL || fh->ref == 0 || --fh->ref > 0) { - __wt_spin_unlock(session, &conn->fh_lock); - return (0); - } - - /* Remove from the list. */ - bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; - WT_CONN_FILE_REMOVE(conn, fh, bucket); - (void)__wt_atomic_sub32(&conn->open_file_count, 1); - - __wt_spin_unlock(session, &conn->fh_lock); - - /* Discard the memory. - * Note: For directories, we do not open valid directory handles on - * windows since it is not possible to sync a directory - */ - if (fh->filehandle != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle) == 0) { - ret = __wt_errno(); - __wt_err(session, ret, "CloseHandle: %s", fh->name); - } - - if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle_secondary) == 0) { - ret = __wt_errno(); - __wt_err(session, ret, "CloseHandle: secondary: %s", fh->name); - } - - __wt_free(session, fh->name); - __wt_free(session, fh); - return (ret); -} diff --git a/src/os_win/os_remove.c b/src/os_win/os_remove.c deleted file mode 100644 index 84f1dd86674..00000000000 --- a/src/os_win/os_remove.c +++ /dev/null @@ -1,71 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __remove_file_check -- - * Check if the file is currently open before removing it. - */ -static inline void -__remove_file_check(WT_SESSION_IMPL *session, const char *name) -{ -#ifdef HAVE_DIAGNOSTIC - WT_CONNECTION_IMPL *conn; - WT_FH *fh; - uint64_t bucket; - - conn = S2C(session); - WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY)); - fh = NULL; - bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; - - /* - * Check if the file is open: it's an error if it is, since a higher - * level should have closed it before removing. - */ - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(name, fh->name) == 0) - break; - __wt_spin_unlock(session, &conn->fh_lock); - - WT_ASSERT(session, fh == NULL); -#else - WT_UNUSED(session); - WT_UNUSED(name); -#endif -} - -/* - * __wt_remove -- - * Remove a file. - */ -int -__wt_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - char *path; - uint32_t lasterror; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: remove", name)); - - __remove_file_check(session, name); - - WT_RET(__wt_filename(session, name, &path)); - - if ((ret = DeleteFileA(path)) == FALSE) - lasterror = __wt_errno(); - - __wt_free(session, path); - - if (ret != FALSE) - return (0); - - WT_RET_MSG(session, lasterror, "%s: remove", name); -} diff --git a/src/os_win/os_rename.c b/src/os_win/os_rename.c deleted file mode 100644 index b4be2dba24c..00000000000 --- a/src/os_win/os_rename.c +++ /dev/null @@ -1,53 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_rename -- - * Rename a file. - */ -int -__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_DECL_RET; - uint32_t lasterror; - char *from_path, *to_path; - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "rename %s to %s", from, to)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - from_path = to_path = NULL; - - WT_RET(__wt_filename(session, from, &from_path)); - WT_TRET(__wt_filename(session, to, &to_path)); - - /* - * Check if file exists since Windows does not override the file if - * it exists. - */ - if ((ret = GetFileAttributesA(to_path)) != INVALID_FILE_ATTRIBUTES) { - if ((ret = DeleteFileA(to_path)) == FALSE) { - lasterror = __wt_errno(); - goto err; - } - } - - if ((MoveFileA(from_path, to_path)) == FALSE) - lasterror = __wt_errno(); - -err: - __wt_free(session, from_path); - __wt_free(session, to_path); - - if (ret != FALSE) - return (0); - - WT_RET_MSG(session, lasterror, "MoveFile %s to %s", from, to); -} diff --git a/src/os_win/os_rw.c b/src/os_win/os_rw.c deleted file mode 100644 index a9537a648f9..00000000000 --- a/src/os_win/os_rw.c +++ /dev/null @@ -1,102 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_read -- - * Read a chunk. - */ -int -__wt_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - DWORD chunk; - DWORD nr; - uint8_t *addr; - OVERLAPPED overlapped = { 0 }; - - nr = 0; - - WT_STAT_FAST_CONN_INCR(session, read_io); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); - overlapped.Offset = UINT32_MAX & offset; - overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - - if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) - WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), - "%s read error: failed to read %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __wt_write -- - * Write a chunk. - */ -int -__wt_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - DWORD chunk; - DWORD nw; - const uint8_t *addr; - OVERLAPPED overlapped = { 0 }; - - nw = 0; - - WT_STAT_FAST_CONN_INCR(session, write_io); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); - overlapped.Offset = UINT32_MAX & offset; - overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - - if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) - WT_RET_MSG(session, __wt_errno(), - "%s write error: failed to write %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} diff --git a/src/os_win/os_sleep.c b/src/os_win/os_sleep.c index 1d4b316488a..1cb61f7c4aa 100644 --- a/src/os_win/os_sleep.c +++ b/src/os_win/os_sleep.c @@ -15,11 +15,15 @@ void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) { + DWORD dwMilliseconds; + /* * If the caller wants a small pause, set to our * smallest granularity. */ if (seconds == 0 && micro_seconds < WT_THOUSAND) micro_seconds = WT_THOUSAND; - Sleep(seconds * WT_THOUSAND + micro_seconds / WT_THOUSAND); + dwMilliseconds = (DWORD) + (seconds * WT_THOUSAND + micro_seconds / WT_THOUSAND); + Sleep(dwMilliseconds); } diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c index 3be0ccb9393..e2394731df1 100644 --- a/src/os_win/os_thread.c +++ b/src/os_win/os_thread.c @@ -21,7 +21,7 @@ __wt_thread_create(WT_SESSION_IMPL *session, if (*tidret != 0) return (0); - WT_RET_MSG(session, errno, "_beginthreadex"); + WT_RET_MSG(session, __wt_errno, "thread create: _beginthreadex"); } /* @@ -37,12 +37,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) /* * If we fail to wait, we will leak handles so do not continue */ - WT_PANIC_RET(session, ret == WAIT_FAILED ? __wt_errno() : ret, - "Wait for thread join failed"); + WT_PANIC_RET(session, + ret == WAIT_FAILED ? __wt_win32_errno() : ret, + "thread join: WaitForSingleObject"); if (CloseHandle(tid) == 0) { - WT_RET_MSG(session, __wt_errno(), - "CloseHandle: thread join"); + WT_RET_MSG(session, + __wt_win32_errno(), "thread join: CloseHandle"); } return (0); diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c new file mode 100644 index 00000000000..6959aeaf917 --- /dev/null +++ b/src/os_win/os_win.c @@ -0,0 +1,677 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __win_directory_sync -- + * Flush a directory to ensure a file creation is durable. + */ +static int +__win_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +} + +/* + * __win_file_exist -- + * Return if the file exists. + */ +static int +__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + + ret = GetFileAttributesA(path); + + __wt_free(session, path); + + if (ret != INVALID_FILE_ATTRIBUTES) + *existp = true; + else + *existp = false; + + return (0); +} + +/* + * __win_file_remove -- + * Remove a file. + */ +static int +__win_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + char *path; + uint32_t lasterror; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, name, false, true, NULL, NULL)) + WT_RET_MSG( + session, EINVAL, "%s: remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + + if ((ret = DeleteFileA(path)) == FALSE) + lasterror = __wt_win32_errno(); + + __wt_free(session, path); + + if (ret != FALSE) + return (0); + + WT_RET_MSG(session, lasterror, "%s: remove", name); +} + +/* + * __win_file_rename -- + * Rename a file. + */ +static int +__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + uint32_t lasterror; + char *from_path, *to_path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, from, false, true, NULL, NULL)) + WT_RET_MSG( + session, EINVAL, "%s: rename: file has open handles", from); + if (__wt_handle_search(session, to, false, true, NULL, NULL)) + WT_RET_MSG( + session, EINVAL, "%s: rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + from = from_path; + WT_ERR(__wt_filename(session, to, &to_path)); + to = to_path; + + /* + * Check if file exists since Windows does not override the file if + * it exists. + */ + if ((ret = GetFileAttributesA(to)) != INVALID_FILE_ATTRIBUTES) { + if ((ret = DeleteFileA(to)) == FALSE) { + lasterror = __wt_win32_errno(); + goto err; + } + } + + if ((MoveFileA(from, to)) == FALSE) + lasterror = __wt_win32_errno(); + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + + if (ret != FALSE) + return (0); + + WT_RET_MSG(session, lasterror, "MoveFile %s to %s", from, to); +} + +/* + * __win_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__win_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WIN32_FILE_ATTRIBUTE_DATA data; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + + ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); + + __wt_free(session, path); + + if (ret != 0) { + *sizep = + ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; + return (0); + } + + /* + * Some callers of this function expect failure if the file doesn't + * exist, and don't want an error message logged. + */ + ret = __wt_win32_errno(); + if (!silent) + WT_RET_MSG(session, ret, "%s: GetFileAttributesEx", name); + return (ret); +} + +/* + * __win_handle_advise -- + * MSVC fadvise. + */ +static int +__win_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + WT_RET_MSG(session, ENOTSUP, "%s: advise", fh->name); +} + +/* + * __win_handle_allocate_configure -- + * Configure fallocate behavior for a file handle. + */ +static void +__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_UNUSED(session); + + /* + * fallocate on Windows would be implemented using SetEndOfFile, which + * can also truncate the file. WiredTiger expects fallocate to ignore + * requests to truncate the file which Windows does not do, so we don't + * support the call. + */ + fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; + fh->fallocate_requires_locking = false; +} + +/* + * __win_handle_allocate -- + * Allocate space for a file handle. + */ +static int +__win_handle_allocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + + return (ENOTSUP); +} + +/* + * __win_handle_close -- + * Close a file handle. + */ +static int +__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_DECL_RET; + + /* + * Note: For directories, we do not open valid directory handles on + * windows since it is not possible to sync a directory + */ + if (fh->filehandle != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, "%s: CloseHandle", fh->name); + } + + if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle_secondary) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, "%s: CloseHandle: secondary", fh->name); + } + return (ret); +} + +/* + * __win_handle_getc -- + * ANSI C fgetc. + */ +static int +__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, + ENOTSUP, "%s: getc: no stream configured", fh->name); + + *chp = fgetc(fh->fp); + if (*chp != EOF || !ferror(fh->fp)) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: getc", fh->name); +} + +/* + * __win_handle_lock -- + * Lock/unlock a file. + */ +static int +__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_DECL_RET; + + /* + * WiredTiger requires this function be able to acquire locks past + * the end of file. + * + * Note we're using fcntl(2) locking: all fcntl locks associated with a + * file for a given process are removed when any file descriptor for the + * file is closed by the process, even if a lock was never requested for + * that file descriptor. + * + * http://msdn.microsoft.com/ + * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx + * + * You can lock bytes that are beyond the end of the current file. + * This is useful to coordinate adding records to the end of a file. + */ + ret = lock ? + LockFile(fh->filehandle, 0, 0, 1, 0) : + UnlockFile(fh->filehandle, 0, 0, 1, 0); + + if (ret == FALSE) + WT_RET_MSG(NULL, __wt_win32_errno(), "%s: LockFile", fh->name); + + return (0); +} + +/* + * __win_handle_printf -- + * ANSI C vfprintf. + */ +static int +__win_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: vfprintf", fh->name); +} + +/* + * __win_handle_read -- + * Read a chunk. + */ +static int +__win_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + DWORD chunk, nr; + uint8_t *addr; + OVERLAPPED overlapped = { 0 }; + + nr = 0; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); + overlapped.Offset = UINT32_MAX & offset; + overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); + + if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) + WT_RET_MSG(session, + nr == 0 ? WT_ERROR : __wt_win32_errno(), + "%s read error: failed to read %" WT_SIZET_FMT + " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __win_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + LARGE_INTEGER size; + WT_DECL_RET; + + if ((ret = GetFileSizeEx(fh->filehandle, &size)) != 0) { + *sizep = size.QuadPart; + return (0); + } + + WT_RET_MSG(session, __wt_win32_errno(), "%s: GetFileSizeEx", fh->name); +} + +/* + * __win_handle_sync -- + * MSVC fflush/fsync. + */ +static int +__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_DECL_RET; + + WT_UNUSED(block); + + if (fh->fp == NULL) { + if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) + WT_RET_MSG(session, __wt_win32_errno(), + "%s FlushFileBuffers error", fh->name); + return (0); + } + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: fflush", fh->name); +} + +/* + * __win_handle_truncate -- + * Truncate a file. + */ +static int +__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + LARGE_INTEGER largeint; + + largeint.QuadPart = len; + + if ((ret = SetFilePointerEx( + fh->filehandle_secondary, largeint, NULL, FILE_BEGIN)) == FALSE) + WT_RET_MSG(session, + __wt_win32_errno(), "%s SetFilePointerEx error", + fh->name); + + ret = SetEndOfFile(fh->filehandle_secondary); + if (ret != FALSE) + return (0); + + if (GetLastError() == ERROR_USER_MAPPED_FILE) + return (EBUSY); + + WT_RET_MSG(session, + __wt_win32_errno(), "%s SetEndOfFile error", fh->name); +} + +/* + * __win_handle_write -- + * Write a chunk. + */ +static int +__win_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + DWORD chunk; + DWORD nw; + const uint8_t *addr; + OVERLAPPED overlapped = { 0 }; + + nw = 0; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); + overlapped.Offset = UINT32_MAX & offset; + overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); + + if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) + WT_RET_MSG(session, __wt_win32_errno(), + "%s write error: failed to write %" WT_SIZET_FMT + " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __win_handle_open -- + * Open a file handle. + */ +static int +__win_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *name, int dio_type, uint32_t flags) +{ + DWORD dwCreationDisposition; + HANDLE filehandle, filehandle_secondary; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + int f, fd, share_mode; + bool direct_io; + char *path; + const char *stream_mode; + + conn = S2C(session); + direct_io = false; + path = NULL; + + filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; + + /* + * Opening a file handle on a directory is only to support filesystems + * that require a directory sync for durability, and Windows doesn't + * require that, functionality: create empty file handles with invalid + * handles. + */ + if (dio_type == WT_FILE_TYPE_DIRECTORY) + goto directory_open; + + /* Create the path to the file. */ + if (!LF_ISSET(WT_OPEN_FIXED)) { + WT_ERR(__wt_filename(session, name, &path)); + name = path; + } + + share_mode = FILE_SHARE_READ; + if (!LF_ISSET(WT_OPEN_READONLY)) + share_mode |= FILE_SHARE_WRITE; + + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. + * + * TODO: Set tighter file permissions but set bInheritHandle to false + * to prevent inheritance + */ + f = FILE_ATTRIBUTE_NORMAL; + + dwCreationDisposition = 0; + if (LF_ISSET(WT_OPEN_CREATE)) { + dwCreationDisposition = CREATE_NEW; + if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + dwCreationDisposition = CREATE_ALWAYS; + } else + dwCreationDisposition = OPEN_EXISTING; + + /* + * direct_io means no OS file caching. This requires aligned buffer + * allocations like O_DIRECT. + */ + if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { + f |= FILE_FLAG_NO_BUFFERING; + direct_io = true; + } + fh->direct_io = direct_io; + + /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ + if (dio_type && FLD_ISSET(conn->write_through, dio_type)) { + f |= FILE_FLAG_WRITE_THROUGH; + } + + if (dio_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { + f |= FILE_FLAG_WRITE_THROUGH; + } + + /* Disable read-ahead on trees: it slows down random read workloads. */ + if (dio_type == WT_FILE_TYPE_DATA || + dio_type == WT_FILE_TYPE_CHECKPOINT) + f |= FILE_FLAG_RANDOM_ACCESS; + + /* Configure file extension. */ + if (dio_type == WT_FILE_TYPE_DATA || + dio_type == WT_FILE_TYPE_CHECKPOINT) + fh->extend_len = conn->data_extend_len; + + filehandle = CreateFileA(name, + (GENERIC_READ | GENERIC_WRITE), + share_mode, + NULL, + dwCreationDisposition, + f, + NULL); + if (filehandle == INVALID_HANDLE_VALUE) { + if (LF_ISSET(WT_OPEN_CREATE) && + GetLastError() == ERROR_FILE_EXISTS) + filehandle = CreateFileA(name, + (GENERIC_READ | GENERIC_WRITE), + share_mode, + NULL, + OPEN_EXISTING, + f, + NULL); + + if (filehandle == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_win32_errno(), + direct_io ? + "%s: open failed with direct I/O configured, some " + "filesystem types do not support direct I/O" : + "%s", name); + } + + /* + * Open a second handle to file to support allocation/truncation + * concurrently with reads on the file. Writes would also move the file + * pointer. + */ + filehandle_secondary = CreateFileA(name, + (GENERIC_READ | GENERIC_WRITE), + share_mode, + NULL, + OPEN_EXISTING, + f, + NULL); + if (filehandle_secondary == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_win32_errno(), + "open failed for secondary handle: %s", name); + + /* Optionally configure the stream API. */ + switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { + case WT_STREAM_APPEND: + f = _O_APPEND | _O_TEXT; + stream_mode = "a"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case WT_STREAM_READ: + f = _O_RDONLY | _O_TEXT; + stream_mode = "r"; + break; + case WT_STREAM_WRITE: + f = _O_TEXT; + stream_mode = "w"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case 0: + default: + stream_mode = NULL; + break; + } + if (stream_mode != NULL) { + if ((fd = _open_osfhandle((intptr_t)filehandle, f)) == -1) + WT_ERR_MSG( + session, __wt_errno(), "%s: _open_osfhandle", name); + if ((fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), "%s: fdopen", name); + } + + /* Configure fallocate/posix_fallocate calls. */ + __win_handle_allocate_configure(session, fh); + +directory_open: + __wt_free(session, path); + fh->filehandle = filehandle; + fh->filehandle_secondary = filehandle_secondary; + + fh->fh_advise = __win_handle_advise; + fh->fh_allocate = __win_handle_allocate; + fh->fh_close = __win_handle_close; + fh->fh_getc = __win_handle_getc; + fh->fh_lock = __win_handle_lock; + fh->fh_printf = __win_handle_printf; + fh->fh_read = __win_handle_read; + fh->fh_size = __win_handle_size; + fh->fh_sync = __win_handle_sync; + fh->fh_truncate = __win_handle_truncate; + fh->fh_write = __win_handle_write; + + return (0); + +err: if (filehandle != INVALID_HANDLE_VALUE) + (void)CloseHandle(filehandle); + if (filehandle_secondary != INVALID_HANDLE_VALUE) + (void)CloseHandle(filehandle_secondary); + + __wt_free(session, path); + return (ret); +} + +/* + * __wt_os_win -- + * Initialize a MSVC configuration. + */ +int +__wt_os_win(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* Initialize the POSIX jump table. */ + conn->file_directory_list = __wt_win_directory_list; + conn->file_directory_sync = __win_directory_sync; + conn->file_exist = __win_file_exist; + conn->file_remove = __win_file_remove; + conn->file_rename = __win_file_rename; + conn->file_size = __win_file_size; + conn->handle_open = __win_handle_open; + + return (0); +} + +/* + * __wt_os_win_cleanup -- + * Discard a POSIX configuration. + */ +int +__wt_os_win_cleanup(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (0); +} diff --git a/src/support/err.c b/src/support/err.c index f1f9c0c16e9..f64492f1561 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -180,13 +180,19 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * example, we can end up here without a session.) */ if (session == NULL) { - WT_RET(__wt_fprintf(session, WT_STDERR(session), + if (fprintf(stderr, "WiredTiger Error%s%s: ", error == 0 ? "" : ": ", - error == 0 ? "" : __wt_strerror(session, error, NULL, 0))); - WT_RET(__wt_vfprintf(session, WT_STDERR(session), fmt, ap)); - WT_RET(__wt_fprintf(session, WT_STDERR(session), "\n")); - return (__wt_fsync(session, WT_STDERR(session), true)); + error == 0 ? "" : + __wt_strerror(session, error, NULL, 0)) < 0) + ret = EIO; + if (vfprintf(stderr, fmt, ap) < 0) + ret = EIO; + if (fprintf(stderr, "\n") < 0) + ret = EIO; + if (fflush(stderr) != 0) + ret = EIO; + return (ret); } p = s; diff --git a/test/utility/test_util.i b/test/utility/test_util.i index f9b889a7610..43982d9e4a1 100644 --- a/test/utility/test_util.i +++ b/test/utility/test_util.i @@ -132,7 +132,7 @@ testutil_clean_work_dir(char *dir) snprintf(buf, len, "%s%s", RM_COMMAND, dir); - if ((ret = system(buf)) != 0) + if ((ret = system(buf)) != 0 && ret != ENOENT) testutil_die(ret, "%s", buf); free(buf); } -- cgit v1.2.1 From 5ee14696cc19d1a484e7258f1130aa08b68f7f1c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 24 Mar 2016 18:09:14 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Sort the OS directory prototypes to avoid duplicates. --- dist/s_prototypes | 44 +++++++++++++------ src/include/extern.h | 118 +++++++++++++++++++-------------------------------- 2 files changed, 75 insertions(+), 87 deletions(-) diff --git a/dist/s_prototypes b/dist/s_prototypes index 311f5826dc2..8620932e688 100755 --- a/dist/s_prototypes +++ b/dist/s_prototypes @@ -4,16 +4,10 @@ t=__wt.$$ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 -( -cat < $t + -e 's/$/;/p' < $1 +} + +( +cat < $t f=../src/include/extern.h cmp $t $f > /dev/null 2>&1 || diff --git a/src/include/extern.h b/src/include/extern.h index 7a767dcebf2..06dbd891cac 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -484,58 +484,6 @@ extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session); extern int __wt_turtle_init(WT_SESSION_IMPL *session); extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep); extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value); -extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); -extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); -extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); -extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); -extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); -extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); -extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); -extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); -extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); -extern int __wt_errno(void); -extern int __wt_map_error_rdonly(int error); -extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); -extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); -extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); -extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); -extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); -extern int __wt_os_init(WT_SESSION_IMPL *session); -extern int __wt_os_cleanup(WT_SESSION_IMPL *session); -extern int __wt_os_inmemory(WT_SESSION_IMPL *session); -extern int __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session); -extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); -extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); -extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); -extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); -extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); -extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); -extern int __wt_once(void (*init_routine)(void)); -extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp); -extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, int dio_type, u_int flags, WT_FH **fhp); -extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); -extern int __wt_close_connection_close(WT_SESSION_IMPL *session); -extern int __wt_get_vm_pagesize(void); -extern bool __wt_absolute_path(const char *path); -extern const char *__wt_path_separator(void); -extern int __wt_os_posix(WT_SESSION_IMPL *session); -extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session); -extern bool __wt_has_priv(void); -extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); -extern int __wt_os_stdio(WT_SESSION_IMPL *session); -extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); -extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); -extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); -extern void __wt_thread_id(char *buf, size_t buflen); -extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); -extern void __wt_yield(void); extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t size, const char *fmt, ...); extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *sizep, const char *fmt, ...); extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t size, const char *fmt, ...); @@ -769,33 +717,55 @@ extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops); extern int __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_recover(WT_SESSION_IMPL *session); -extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); +extern bool __wt_absolute_path(const char *path); +extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp); +extern bool __wt_has_priv(void); +extern const char *__wt_path_separator(void); +extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); +extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); +extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); +extern int __wt_close_connection_close(WT_SESSION_IMPL *session); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); +extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); +extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); +extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); -extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); -extern int __wt_map_error_rdonly(int winerr); +extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); extern int __wt_errno(void); -extern int __wt_win32_errno(void); -extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); +extern int __wt_get_vm_pagesize(void); extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); -extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void**mappingcookie); -extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); +extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); +extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); +extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); +extern int __wt_map_error_rdonly(int error); +extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); -extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void**mappingcookie); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); -extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); -extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); -extern int __wt_once(void(*init_routine)(void)); -extern int __wt_get_vm_pagesize(void); -extern bool __wt_absolute_path(const char *path); -extern const char *__wt_path_separator(void); -extern bool __wt_has_priv(void); -extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); +extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); +extern int __wt_once(void (*init_routine)(void)); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, int dio_type, u_int flags, WT_FH **fhp); +extern int __wt_os_cleanup(WT_SESSION_IMPL *session); +extern int __wt_os_init(WT_SESSION_IMPL *session); +extern int __wt_os_inmemory(WT_SESSION_IMPL *session); +extern int __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session); +extern int __wt_os_posix(WT_SESSION_IMPL *session); +extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session); +extern int __wt_os_stdio(WT_SESSION_IMPL *session); +extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); +extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); +extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); -extern void __wt_thread_id(char*buf, size_t buflen); -extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); -extern int __wt_os_win(WT_SESSION_IMPL *session); -extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session); +extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); +extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); +extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session); +extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); +extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern void __wt_thread_id(char *buf, size_t buflen); extern void __wt_yield(void); -- cgit v1.2.1 From 7f0be084e118b29121ca01eee8f0a3a920df4c92 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 24 Mar 2016 18:27:05 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Function prototypes have unusual characters, force a simple sort. --- dist/s_prototypes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/s_prototypes b/dist/s_prototypes index 8620932e688..f51c22c80ca 100755 --- a/dist/s_prototypes +++ b/dist/s_prototypes @@ -54,7 +54,7 @@ done l=`sed -e '/^[a-z]/!d' -e '/os/!d' filelist` for i in $l; do proto ../$i -done | sort -u +done | env LC_ALL=C sort -u ) > $t f=../src/include/extern.h -- cgit v1.2.1 From 804b3fac4a7f450c703e1604571bc59a304bfb8a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 24 Mar 2016 19:18:06 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Another attempt at getting the Windows prototypes included. --- dist/s_prototypes | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dist/s_prototypes b/dist/s_prototypes index f51c22c80ca..4ceb69f4c77 100755 --- a/dist/s_prototypes +++ b/dist/s_prototypes @@ -47,14 +47,15 @@ EOF # signatures are on multiple lines, that is, #ifdef'd function signatures. Since # the OS directories are the only places with repeated names, and they have no # #ifdef'd signatures, we do it this way. -l=`sed -e '/^[a-z]/!d' -e '/os/d' filelist` +l=`sed -e '/^[a-z]/!d' -e '/src\/os/d' filelist` for i in $l; do proto ../$i done -l=`sed -e '/^[a-z]/!d' -e '/os/!d' filelist` +l=`echo ../src\/os*/*.c` + for i in $l; do - proto ../$i -done | env LC_ALL=C sort -u + proto $i +done | tee xxx | env LC_ALL=C sort -u ) > $t f=../src/include/extern.h -- cgit v1.2.1 From 5908a85fbd088086932a95b0af503f469e59b4d2 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 24 Mar 2016 19:18:37 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Fix a few places in the Windows code where we were using a uint32_t to store return values; consistently use "int ret". --- src/include/extern.h | 9 +++++++++ src/os_posix/os_posix.c | 2 +- src/os_win/os_win.c | 37 ++++++++++++++++--------------------- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index 06dbd891cac..64bf3b56808 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -740,11 +740,15 @@ extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); extern int __wt_map_error_rdonly(int error); +extern int __wt_map_error_rdonly(int winerr); extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); +extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void**mappingcookie); extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); +extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void**mappingcookie); extern int __wt_once(void (*init_routine)(void)); +extern int __wt_once(void(*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, int dio_type, u_int flags, WT_FH **fhp); extern int __wt_os_cleanup(WT_SESSION_IMPL *session); extern int __wt_os_init(WT_SESSION_IMPL *session); @@ -753,6 +757,8 @@ extern int __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session); extern int __wt_os_posix(WT_SESSION_IMPL *session); extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session); extern int __wt_os_stdio(WT_SESSION_IMPL *session); +extern int __wt_os_win(WT_SESSION_IMPL *session); +extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session); extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); @@ -761,6 +767,8 @@ extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocate extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); +extern int __wt_win32_errno(void); +extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); @@ -768,4 +776,5 @@ extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session); extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); extern void __wt_thread_id(char *buf, size_t buflen); +extern void __wt_thread_id(char*buf, size_t buflen); extern void __wt_yield(void); diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index 059ac1cf835..e127df3fdc8 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -164,7 +164,7 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name) WT_SYSCALL_RETRY(remove(name), ret); if (ret != 0) - __wt_err(session, ret, "%s: remove", path); + __wt_err(session, ret, "%s: remove", name); __wt_free(session, path); return (ret); diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 6959aeaf917..036666af4c7 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -53,7 +53,6 @@ __win_file_remove(WT_SESSION_IMPL *session, const char *name) { WT_DECL_RET; char *path; - uint32_t lasterror; #ifdef HAVE_DIAGNOSTIC if (__wt_handle_search(session, name, false, true, NULL, NULL)) @@ -62,16 +61,15 @@ __win_file_remove(WT_SESSION_IMPL *session, const char *name) #endif WT_RET(__wt_filename(session, name, &path)); + name = path; - if ((ret = DeleteFileA(path)) == FALSE) - lasterror = __wt_win32_errno(); + if (DeleteFileA(path) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, "%s: remove", name); + } __wt_free(session, path); - - if (ret != FALSE) - return (0); - - WT_RET_MSG(session, lasterror, "%s: remove", name); + return (ret); } /* @@ -82,7 +80,6 @@ static int __win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) { WT_DECL_RET; - uint32_t lasterror; char *from_path, *to_path; #ifdef HAVE_DIAGNOSTIC @@ -104,23 +101,21 @@ __win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) * Check if file exists since Windows does not override the file if * it exists. */ - if ((ret = GetFileAttributesA(to)) != INVALID_FILE_ATTRIBUTES) { - if ((ret = DeleteFileA(to)) == FALSE) { - lasterror = __wt_win32_errno(); + if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) + if (DeleteFileA(to) == FALSE) { + ret = __wt_win32_errno(); goto err; } - } - if ((MoveFileA(from, to)) == FALSE) - lasterror = __wt_win32_errno(); + if (MoveFileA(from, to) == FALSE) + ret = __wt_win32_errno(); -err: __wt_free(session, from_path); - __wt_free(session, to_path); +err: if (ret != 0) + __wt_err(session, ret, "%s to %s: rename", from, to); - if (ret != FALSE) - return (0); - - WT_RET_MSG(session, lasterror, "MoveFile %s to %s", from, to); + __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); } /* -- cgit v1.2.1 From 5bc7904331edd162cdbe5c1dc314bc38d0b6418e Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 24 Mar 2016 20:01:00 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Consistency pass on error messages, include the stylized name of the function plus the underlying library/system call that failed. Fix the Windows lock function to return failure. --- dist/s_string.ok | 9 ++++ src/os_posix/os_dir.c | 4 +- src/os_posix/os_inmemory.c | 5 +- src/os_posix/os_posix.c | 87 +++++++++++++++++-------------- src/os_posix/os_stdio.c | 22 ++++---- src/os_win/os_dir.c | 9 ++-- src/os_win/os_win.c | 127 ++++++++++++++++++++++++--------------------- 7 files changed, 143 insertions(+), 120 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index b07e8dc311d..b6ee2ce50e1 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -67,6 +67,7 @@ CloseHandle Comparator Config Coverity +CreateFileA CreateFileMapping Crummey CustomersPhone @@ -81,6 +82,7 @@ DbCursor DbEnv Decrement Decrypt +DeleteFileA EAGAIN EBUSY EEXIST @@ -248,6 +250,7 @@ Prepend Qsort RCS RDNOLOCK +RDONLY RECNO REF's REFs @@ -260,6 +263,7 @@ RNG RPC RUNDIR Radu +ReadFile Readonly Rebalance RedHat @@ -317,6 +321,7 @@ UTF UltraSparc Unbuffered UnixLib +UnlockFile Unmap UnmapViewOfFile Unmarshall @@ -353,6 +358,7 @@ WiredTigerPreplog WiredTigerTmplog WiredTigerTxn WithSeeds +WriteFile Wuninitialized Wunused XP @@ -596,6 +602,7 @@ fallocate fblocks fclose fcntl +fd fdatasync fdopen ffc @@ -605,6 +612,7 @@ fgetc fgetln fh filefrag +filehandle fileid filename filenames @@ -693,6 +701,7 @@ instantiation intl intnum intpack +intptr intrin inuse io diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c index 49ee1e0f84a..d38cc58d132 100644 --- a/src/os_posix/os_dir.c +++ b/src/os_posix/os_dir.c @@ -47,7 +47,7 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret); if (ret != 0) - WT_ERR_MSG(session, ret, "%s: opendir", path); + WT_ERR_MSG(session, ret, "%s: directory-list: opendir", path); for (dirsz = 0, count = 0; (dp = readdir(dirp)) != NULL;) { /* * Skip . and .. @@ -95,6 +95,6 @@ err: __wt_free(session, entries); } WT_RET_MSG(session, ret, - "directory-list %s, prefix \"%s\"", + "%s: directory-list, prefix \"%s\"", dir, prefix == NULL ? "" : prefix); } diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index 825d33becfa..ded0e05d092 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -293,7 +293,7 @@ __im_handle_read( if (ret == 0) return (0); WT_RET_MSG(session, WT_ERROR, - "%s read error: failed to read %" WT_SIZET_FMT " bytes at " + "%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at " "offset %" WT_SIZET_FMT, fh->name, len, off); } @@ -321,7 +321,6 @@ __im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) WT_UNUSED(session); WT_UNUSED(fh); WT_UNUSED(block); - return (0); } @@ -373,7 +372,7 @@ err: __wt_spin_unlock(session, &im->lock); if (ret == 0) return (0); WT_RET_MSG(session, ret, - "%s write error: failed to write %" WT_SIZET_FMT " bytes at " + "%s: handle-write: failed to write %" WT_SIZET_FMT " bytes at " "offset %" WT_SIZET_FMT, fh->name, len, off); } diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index e127df3fdc8..70acbe9bf86 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -13,7 +13,8 @@ * Underlying support function to flush a file handle. */ static int -__posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, bool block) +__posix_sync(WT_SESSION_IMPL *session, + int fd, const char *name, const char *func, bool block) { WT_DECL_RET; @@ -23,7 +24,7 @@ __posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, bool block) (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: sync_file_range", name); + WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func); } #else if (!block) @@ -57,12 +58,12 @@ __posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, bool block) WT_SYSCALL_RETRY(fdatasync(fd), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: fdatasync", name); + WT_RET_MSG(session, ret, "%s: %s: fdatasync", name, func); #else WT_SYSCALL_RETRY(fsync(fd), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: fsync", name); + WT_RET_MSG(session, ret, "%s: %s: fsync", name, func); #endif } @@ -101,13 +102,13 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) WT_SYSCALL_RETRY(( (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); if (ret != 0) - WT_RET_MSG(session, ret, "%s: open", path); + WT_RET_MSG(session, ret, "%s: directory-sync: open", path); - ret = __posix_sync(session, fd, path, true); + ret = __posix_sync(session, fd, path, "directory-sync", true); WT_SYSCALL_RETRY(close(fd), tret); if (tret != 0) - __wt_err(session, tret, "%s: fsync", path); + __wt_err(session, tret, "%s: directory-sync: close", path); return (ret == 0 ? tret : ret); #else WT_UNUSED(session); @@ -137,7 +138,7 @@ __posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) *existp = false; ret = 0; } else - __wt_err(session, ret, "%s: stat", name); + __wt_err(session, ret, "%s: file-exist: stat", name); __wt_free(session, path); return (ret); @@ -155,8 +156,8 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name) #ifdef HAVE_DIAGNOSTIC if (__wt_handle_search(session, name, false, true, NULL, NULL)) - WT_RET_MSG( - session, EINVAL, "%s: remove: file has open handles", name); + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); #endif WT_RET(__wt_filename(session, name, &path)); @@ -164,7 +165,7 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name) WT_SYSCALL_RETRY(remove(name), ret); if (ret != 0) - __wt_err(session, ret, "%s: remove", name); + __wt_err(session, ret, "%s: file-remove: remove", name); __wt_free(session, path); return (ret); @@ -182,11 +183,11 @@ __posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) #ifdef HAVE_DIAGNOSTIC if (__wt_handle_search(session, from, false, true, NULL, NULL)) - WT_RET_MSG( - session, EINVAL, "%s: rename: file has open handles", from); + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); if (__wt_handle_search(session, to, false, true, NULL, NULL)) - WT_RET_MSG( - session, EINVAL, "%s: rename: file has open handles", to); + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); #endif from_path = to_path = NULL; @@ -197,7 +198,8 @@ __posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) WT_SYSCALL_RETRY(rename(from, to), ret); if (ret != 0) - __wt_err(session, ret, "%s to %s: rename", from, to); + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); err: __wt_free(session, from_path); __wt_free(session, to_path); @@ -227,7 +229,7 @@ __posix_file_size( if (ret == 0) *sizep = sb.st_size; else if (ret != ENOENT || !silent) - __wt_err(session, ret, "%s: stat", name); + __wt_err(session, ret, "%s: file-size: stat", name); __wt_free(session, path); @@ -248,7 +250,7 @@ __posix_handle_advise(WT_SESSION_IMPL *session, WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: posix_fadvise", fh->name); + WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name); #else WT_UNUSED(session); WT_UNUSED(fh); @@ -273,18 +275,18 @@ __posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) WT_SYSCALL_RETRY(close(fh->fd), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: close", fh->name); + WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name); } /* If the handle was opened for writing, flush the file. */ if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { ret = __wt_errno(); - __wt_err(session, ret, "%s: fflush", fh->name); + __wt_err(session, ret, "%s: handle-close: fflush", fh->name); } if ((tret = fclose(fh->fp)) != 0) { tret = __wt_errno(); - __wt_err(session, tret, "%s: fclose", fh->name); + __wt_err(session, tret, "%s: handle-close: fclose", fh->name); } return (ret == 0 ? tret : ret); } @@ -298,12 +300,12 @@ __posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) { if (fh->fp == NULL) WT_RET_MSG(session, - ENOTSUP, "%s: getc: no stream configured", fh->name); + ENOTSUP, "%s: handle-getc: no stream configured", fh->name); *chp = fgetc(fh->fp); if (*chp != EOF || !ferror(fh->fp)) return (0); - WT_RET_MSG(session, __wt_errno(), "%s: getc", fh->name); + WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); } /* @@ -333,7 +335,7 @@ __posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: fcntl", fh->name); + WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name); } /* @@ -350,7 +352,7 @@ __posix_handle_printf( if (vfprintf(fh->fp, fmt, ap) >= 0) return (0); - WT_RET_MSG(session, EIO, "%s: vfprintf", fh->name); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); } /* @@ -379,8 +381,8 @@ __posix_handle_read( chunk = WT_MIN(len, WT_GIGABYTE); if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), - "%s read error: failed to read %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, + "%s: handle-read: pread: failed to read %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, fh->name, chunk, (uintmax_t)offset); } return (0); @@ -401,7 +403,7 @@ __posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) *sizep = sb.st_size; return (0); } - WT_RET_MSG(session, ret, "%s: fstat", fh->name); + WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name); } /* @@ -412,11 +414,12 @@ static int __posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { if (fh->fp == NULL) - return (__posix_sync(session, fh->fd, fh->name, block)); + return (__posix_sync( + session, fh->fd, fh->name, "handle-sync", block)); if (fflush(fh->fp) == 0) return (0); - WT_RET_MSG(session, __wt_errno(), "%s: fflush", fh->name); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); } /* @@ -431,7 +434,7 @@ __posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: ftruncate", fh->name); + WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name); } /* @@ -460,8 +463,8 @@ __posix_handle_write(WT_SESSION_IMPL *session, chunk = WT_MIN(len, WT_GIGABYTE); if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) WT_RET_MSG(session, __wt_errno(), - "%s write error: failed to write %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, + "%s: handle-write: pwrite: failed to write %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, fh->name, chunk, (uintmax_t)offset); } return (0); @@ -501,7 +504,7 @@ __posix_handle_open(WT_SESSION_IMPL *session, (fd = open(name, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); if (ret == 0) goto setupfh; - WT_ERR_MSG(session, ret, "%s: open", name); + WT_ERR_MSG(session, ret, "%s: handle-open: open", name); } f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; @@ -555,8 +558,9 @@ __posix_handle_open(WT_SESSION_IMPL *session, if (ret != 0) WT_ERR_MSG(session, ret, direct_io ? - "%s: open failed with direct I/O configured, some " - "filesystem types do not support direct I/O" : "%s", name); + "%s: handle-open: open: failed with direct I/O configured, " + "some filesystem types do not support direct I/O" : + "%s: handle-open: open", name); setupfh: #if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) @@ -568,7 +572,8 @@ setupfh: */ if ((f = fcntl(fd, F_GETFD)) == -1 || fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) - WT_ERR_MSG(session, __wt_errno(), "%s: fcntl", name); + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fcntl", name); #endif /* Disable read-ahead on trees: it slows down random read workloads. */ @@ -578,7 +583,8 @@ setupfh: WT_SYSCALL_RETRY( posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); if (ret != 0) - WT_ERR_MSG(session, ret, "%s: posix_fadvise", name); + WT_ERR_MSG(session, ret, + "%s: handle-open: posix_fadvise", name); } #endif @@ -606,7 +612,8 @@ setupfh: break; } if (stream_mode != NULL && (fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), "%s: fdopen", name); + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); __wt_free(session, path); fh->fd = fd; @@ -631,7 +638,7 @@ setupfh: err: if (fd != -1) { WT_SYSCALL_RETRY(close(fd), tret); if (tret != 0) - __wt_err(session, tret, "%s: close", name); + __wt_err(session, tret, "%s: handle-open: close", name); } __wt_free(session, path); fh->fd = -1; diff --git a/src/os_posix/os_stdio.c b/src/os_posix/os_stdio.c index 98242773217..cd6979f1d20 100644 --- a/src/os_posix/os_stdio.c +++ b/src/os_posix/os_stdio.c @@ -19,7 +19,7 @@ __stdio_handle_advise(WT_SESSION_IMPL *session, WT_UNUSED(offset); WT_UNUSED(len); WT_UNUSED(advice); - WT_RET_MSG(session, ENOTSUP, "%s: advise", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-advise", fh->name); } /* @@ -32,7 +32,7 @@ __stdio_handle_allocate( { WT_UNUSED(offset); WT_UNUSED(len); - WT_RET_MSG(session, ENOTSUP, "%s: allocate", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); } /* @@ -42,7 +42,7 @@ __stdio_handle_allocate( static int __stdio_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) { - WT_RET_MSG(session, ENOTSUP, "%s: close", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-close", fh->name); } /* @@ -53,7 +53,7 @@ static int __stdio_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) { WT_UNUSED(chp); - WT_RET_MSG(session, ENOTSUP, "%s: getc", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-getc", fh->name); } /* @@ -64,7 +64,7 @@ static int __stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) { WT_UNUSED(lock); - WT_RET_MSG(session, ENOTSUP, "%s: lock", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-lock", fh->name); } /* @@ -77,7 +77,7 @@ __stdio_handle_printf( { if (vfprintf(fh->fp, fmt, ap) >= 0) return (0); - WT_RET_MSG(session, EIO, "%s: vfprintf", fh->name); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); } /* @@ -91,7 +91,7 @@ __stdio_handle_read( WT_UNUSED(offset); WT_UNUSED(len); WT_UNUSED(buf); - WT_RET_MSG(session, ENOTSUP, "%s: read", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-read", fh->name); } /* @@ -102,7 +102,7 @@ static int __stdio_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) { WT_UNUSED(sizep); - WT_RET_MSG(session, ENOTSUP, "%s: size", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-size", fh->name); } /* @@ -116,7 +116,7 @@ __stdio_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) if (fflush(fh->fp) == 0) return (0); - WT_RET_MSG(session, __wt_errno(), "%s: fflush", fh->name); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); } /* @@ -127,7 +127,7 @@ static int __stdio_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) { WT_UNUSED(len); - WT_RET_MSG(session, ENOTSUP, "%s: truncate", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-truncate", fh->name); } /* @@ -141,7 +141,7 @@ __stdio_handle_write(WT_SESSION_IMPL *session, WT_UNUSED(offset); WT_UNUSED(len); WT_UNUSED(buf); - WT_RET_MSG(session, ENOTSUP, "%s: write", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-write", fh->name); } /* diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c index 98ddc820850..083fbce97fc 100644 --- a/src/os_win/os_dir.c +++ b/src/os_win/os_dir.c @@ -53,10 +53,9 @@ __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, prefix == NULL ? "all" : prefix)); findhandle = FindFirstFileA(pathbuf->data, &finddata); - - if (INVALID_HANDLE_VALUE == findhandle) - WT_ERR_MSG(session, __wt_win32_errno(), "%s: FindFirstFile", - pathbuf->data); + if (findhandle == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_win32_errno(), + "%s: directory-list: FindFirstFile", pathbuf->data); else { do { /* @@ -112,6 +111,6 @@ err: } WT_RET_MSG(session, ret, - "directory-list %s, prefix \"%s\"", + "%s: directory-list, prefix \"%s\"", dir, prefix == NULL ? "" : prefix); } diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 036666af4c7..c1c5c029005 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -56,16 +56,16 @@ __win_file_remove(WT_SESSION_IMPL *session, const char *name) #ifdef HAVE_DIAGNOSTIC if (__wt_handle_search(session, name, false, true, NULL, NULL)) - WT_RET_MSG( - session, EINVAL, "%s: remove: file has open handles", name); + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); #endif WT_RET(__wt_filename(session, name, &path)); name = path; - if (DeleteFileA(path) == FALSE) { + if (DeleteFileA(name) == FALSE) { ret = __wt_win32_errno(); - __wt_err(session, ret, "%s: remove", name); + __wt_err(session, ret, "%s: file-remove: DeleteFileA", name); } __wt_free(session, path); @@ -84,11 +84,11 @@ __win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) #ifdef HAVE_DIAGNOSTIC if (__wt_handle_search(session, from, false, true, NULL, NULL)) - WT_RET_MSG( - session, EINVAL, "%s: rename: file has open handles", from); + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); if (__wt_handle_search(session, to, false, true, NULL, NULL)) - WT_RET_MSG( - session, EINVAL, "%s: rename: file has open handles", to); + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); #endif from_path = to_path = NULL; @@ -104,16 +104,17 @@ __win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) if (DeleteFileA(to) == FALSE) { ret = __wt_win32_errno(); - goto err; + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); } - if (MoveFileA(from, to) == FALSE) + if (ret == 0 && MoveFileA(from, to) == FALSE) { ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + } -err: if (ret != 0) - __wt_err(session, ret, "%s to %s: rename", from, to); - - __wt_free(session, from_path); +err: __wt_free(session, from_path); __wt_free(session, to_path); return (ret); } @@ -148,7 +149,8 @@ __win_file_size( */ ret = __wt_win32_errno(); if (!silent) - WT_RET_MSG(session, ret, "%s: GetFileAttributesEx", name); + WT_RET_MSG(session, ret, + "%s: file-size: GetFileAttributesEx", name); return (ret); } @@ -163,7 +165,7 @@ __win_handle_advise(WT_SESSION_IMPL *session, WT_UNUSED(offset); WT_UNUSED(len); WT_UNUSED(advice); - WT_RET_MSG(session, ENOTSUP, "%s: advise", fh->name); + WT_RET_MSG(session, ENOTSUP, "%s: handle-advise", fh->name); } /* @@ -198,6 +200,7 @@ __win_handle_allocate( WT_UNUSED(offset); WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); return (ENOTSUP); } @@ -217,13 +220,15 @@ __win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) if (fh->filehandle != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle) == 0) { ret = __wt_win32_errno(); - __wt_err(session, ret, "%s: CloseHandle", fh->name); + __wt_err(session, ret, + "%s: handle-close: CloseHandle", fh->name); } if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle_secondary) == 0) { ret = __wt_win32_errno(); - __wt_err(session, ret, "%s: CloseHandle: secondary", fh->name); + __wt_err(session, ret, + "%s: handle-close: secondary: CloseHandle", fh->name); } return (ret); } @@ -237,12 +242,12 @@ __win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) { if (fh->fp == NULL) WT_RET_MSG(session, - ENOTSUP, "%s: getc: no stream configured", fh->name); + ENOTSUP, "%s: handle-getc: no stream configured", fh->name); *chp = fgetc(fh->fp); if (*chp != EOF || !ferror(fh->fp)) return (0); - WT_RET_MSG(session, __wt_errno(), "%s: getc", fh->name); + WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); } /* @@ -269,14 +274,19 @@ __win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) * You can lock bytes that are beyond the end of the current file. * This is useful to coordinate adding records to the end of a file. */ - ret = lock ? - LockFile(fh->filehandle, 0, 0, 1, 0) : - UnlockFile(fh->filehandle, 0, 0, 1, 0); - - if (ret == FALSE) - WT_RET_MSG(NULL, __wt_win32_errno(), "%s: LockFile", fh->name); - - return (0); + if (lock) { + if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-lock: LockFile", fh->name); + } + } else + if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-lock: UnlockFile", fh->name); + } + return (ret); } /* @@ -293,7 +303,7 @@ __win_handle_printf( if (vfprintf(fh->fp, fmt, ap) >= 0) return (0); - WT_RET_MSG(session, EIO, "%s: vfprintf", fh->name); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); } /* @@ -328,8 +338,8 @@ __win_handle_read( if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_win32_errno(), - "%s read error: failed to read %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, + "%s: handle-read: ReadFile: failed to read %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, fh->name, chunk, (uintmax_t)offset); } return (0); @@ -343,14 +353,14 @@ static int __win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) { LARGE_INTEGER size; - WT_DECL_RET; - if ((ret = GetFileSizeEx(fh->filehandle, &size)) != 0) { + if (GetFileSizeEx(fh->filehandle, &size) != 0) { *sizep = size.QuadPart; return (0); } - WT_RET_MSG(session, __wt_win32_errno(), "%s: GetFileSizeEx", fh->name); + WT_RET_MSG(session, + __wt_win32_errno(), "%s: handle-size: GetFileSizeEx", fh->name); } /* @@ -367,13 +377,13 @@ __win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) if (fh->fp == NULL) { if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) WT_RET_MSG(session, __wt_win32_errno(), - "%s FlushFileBuffers error", fh->name); + "%s handle-sync: FlushFileBuffers error", fh->name); return (0); } if (fflush(fh->fp) == 0) return (0); - WT_RET_MSG(session, __wt_errno(), "%s: fflush", fh->name); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); } /* @@ -388,21 +398,18 @@ __win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) largeint.QuadPart = len; - if ((ret = SetFilePointerEx( - fh->filehandle_secondary, largeint, NULL, FILE_BEGIN)) == FALSE) - WT_RET_MSG(session, - __wt_win32_errno(), "%s SetFilePointerEx error", - fh->name); - - ret = SetEndOfFile(fh->filehandle_secondary); - if (ret != FALSE) - return (0); - - if (GetLastError() == ERROR_USER_MAPPED_FILE) - return (EBUSY); + if (SetFilePointerEx( + fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) + WT_RET_MSG(session, __wt_win32_errno(), + "%s: handle-truncate: SetFilePointerEx", fh->name); - WT_RET_MSG(session, - __wt_win32_errno(), "%s SetEndOfFile error", fh->name); + if (SetEndOfFile(fh->filehandle_secondary) == FALSE) { + if (GetLastError() == ERROR_USER_MAPPED_FILE) + return (EBUSY); + WT_RET_MSG(session, __wt_win32_errno(), + "%s: handle-truncate: SetEndOfFile error", fh->name); + } + return (0); } /* @@ -437,8 +444,8 @@ __win_handle_write(WT_SESSION_IMPL *session, if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) WT_RET_MSG(session, __wt_win32_errno(), - "%s write error: failed to write %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, + "%s: handle-write: WriteFile: failed to write %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, fh->name, chunk, (uintmax_t)offset); } return (0); @@ -555,9 +562,10 @@ __win_handle_open(WT_SESSION_IMPL *session, if (filehandle == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_win32_errno(), direct_io ? - "%s: open failed with direct I/O configured, some " - "filesystem types do not support direct I/O" : - "%s", name); + "%s: handle-open: CreateFileA: failed with direct " + "I/O configured, some filesystem types do not " + "support direct I/O" : + "%s: handle-open: CreateFileA", name); } /* @@ -574,7 +582,7 @@ __win_handle_open(WT_SESSION_IMPL *session, NULL); if (filehandle_secondary == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_win32_errno(), - "open failed for secondary handle: %s", name); + "%s: handle-open: CreateFileA: secondary", name); /* Optionally configure the stream API. */ switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { @@ -599,10 +607,11 @@ __win_handle_open(WT_SESSION_IMPL *session, } if (stream_mode != NULL) { if ((fd = _open_osfhandle((intptr_t)filehandle, f)) == -1) - WT_ERR_MSG( - session, __wt_errno(), "%s: _open_osfhandle", name); + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: _open_osfhandle", name); if ((fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), "%s: fdopen", name); + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); } /* Configure fallocate/posix_fallocate calls. */ -- cgit v1.2.1 From c707de028359231fe691b6ad3efecaf87c2404d1 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 24 Mar 2016 20:13:10 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files For sort to be able to remove duplicate function signatures from the prototypes, the prototypes have to be identical. --- src/os_win/os_errno.c | 8 ++++---- src/os_win/os_map.c | 8 ++++---- src/os_win/os_once.c | 2 +- src/os_win/os_thread.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/os_win/os_errno.c b/src/os_win/os_errno.c index 1968dadd856..269be2abee3 100644 --- a/src/os_win/os_errno.c +++ b/src/os_win/os_errno.c @@ -46,13 +46,13 @@ __wt_map_windows_error_to_error(DWORD winerr) * of failures. */ int -__wt_map_error_rdonly(int winerr) +__wt_map_error_rdonly(int error) { - if (winerr == ERROR_FILE_NOT_FOUND) + if (error == ERROR_FILE_NOT_FOUND) return (WT_NOTFOUND); - else if (winerr == ERROR_ACCESS_DENIED) + else if (error == ERROR_ACCESS_DENIED) return (WT_PERM_DENIED); - return (winerr); + return (error); } /* diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index 96146705fe4..c41e0f83ae7 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -13,8 +13,8 @@ * Map a file into memory. */ int -__wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, - void** mappingcookie) +__wt_mmap(WT_SESSION_IMPL *session, + WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) { void *map; size_t orig_size; @@ -85,8 +85,8 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size) * Remove a memory mapping. */ int -__wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, - void** mappingcookie) +__wt_munmap(WT_SESSION_IMPL *session, + WT_FH *fh, void *map, size_t len, void **mappingcookie) { WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: UnmapViewOfFile %p: %" WT_SIZET_FMT " bytes", diff --git a/src/os_win/os_once.c b/src/os_win/os_once.c index 9ea3fe044eb..347d1883cca 100644 --- a/src/os_win/os_once.c +++ b/src/os_win/os_once.c @@ -32,7 +32,7 @@ BOOL CALLBACK _wt_init_once_callback( * One-time initialization per process. */ int -__wt_once(void(*init_routine)(void)) +__wt_once(void (*init_routine)(void)) { INIT_ONCE once_control = INIT_ONCE_STATIC_INIT; PVOID lpContext = NULL; diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c index e2394731df1..1a223a19cf8 100644 --- a/src/os_win/os_thread.c +++ b/src/os_win/os_thread.c @@ -54,7 +54,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * Fill in a printable version of the process and thread IDs. */ void -__wt_thread_id(char* buf, size_t buflen) +__wt_thread_id(char *buf, size_t buflen) { (void)snprintf(buf, buflen, "%" PRIu64 ":%" PRIu64, -- cgit v1.2.1 From b09501ef3ca4881e40edbfaebe034c9b8442f016 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 24 Mar 2016 20:20:04 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Rebuild the prototypes. --- src/include/extern.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index 64bf3b56808..a1f70c4340b 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -740,15 +740,11 @@ extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); extern int __wt_map_error_rdonly(int error); -extern int __wt_map_error_rdonly(int winerr); extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); -extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void**mappingcookie); extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); -extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void**mappingcookie); extern int __wt_once(void (*init_routine)(void)); -extern int __wt_once(void(*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, int dio_type, u_int flags, WT_FH **fhp); extern int __wt_os_cleanup(WT_SESSION_IMPL *session); extern int __wt_os_init(WT_SESSION_IMPL *session); @@ -776,5 +772,4 @@ extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session); extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); extern void __wt_thread_id(char *buf, size_t buflen); -extern void __wt_thread_id(char*buf, size_t buflen); extern void __wt_yield(void); -- cgit v1.2.1 From f4bf1e6be331d6f79d2149390d947dae8682ea29 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 06:52:03 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Whitespace. --- src/conn/conn_log.c | 3 +-- src/log/log.c | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index a3af121036d..6cb8ba3d0f9 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -260,8 +260,7 @@ __log_prealloc_once(WT_SESSION_IMPL *session) * files that may not have been used yet. */ WT_ERR(__wt_dirlist(session, conn->log_path, - WT_LOG_PREPNAME, WT_DIRLIST_INCLUDE, - &recfiles, &reccount)); + WT_LOG_PREPNAME, WT_DIRLIST_INCLUDE, &recfiles, &reccount)); __wt_log_files_free(session, recfiles, reccount); recfiles = NULL; /* diff --git a/src/log/log.c b/src/log/log.c index 44e725adeda..ab8e2a4f2de 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -724,8 +724,7 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num) * If there are no pre-allocated files, return WT_NOTFOUND. */ logfiles = NULL; - WT_ERR(__log_get_files(session, - WT_LOG_PREPNAME, &logfiles, &logcount)); + WT_ERR(__log_get_files(session, WT_LOG_PREPNAME, &logfiles, &logcount)); if (logcount == 0) return (WT_NOTFOUND); @@ -955,8 +954,7 @@ __log_truncate(WT_SESSION_IMPL *session, */ if (this_log) goto err; - WT_ERR(__log_get_files(session, - WT_LOG_FILENAME, &logfiles, &logcount)); + WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); if (lognum > lsn->l.file && -- cgit v1.2.1 From 0dfb67c62f0278a2420daaa4fe31d88250e79a22 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 08:23:34 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Add a new verbose option, "handleops", add verbose file and handle operation messages back into the top-level functions. --- dist/api_data.py | 1 + dist/flags.py | 1 + dist/s_string.ok | 1 + src/config/config_def.c | 30 +++--- src/conn/conn_api.c | 1 + src/include/extern.h | 2 +- src/include/flags.h | 37 ++++---- src/include/misc.i | 224 +++++++++++++++++++++++++++----------------- src/include/wiredtiger.in | 12 +-- src/os_posix/os_dir.c | 9 +- src/os_posix/os_fallocate.c | 6 -- src/os_posix/os_open.c | 82 +++++++++++++++- src/os_win/os_dir.c | 7 -- 13 files changed, 261 insertions(+), 152 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index 02aee1e8825..5ca294a5d60 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -569,6 +569,7 @@ connection_runtime_config = [ 'evict', 'evictserver', 'fileops', + 'handleops', 'log', 'lsm', 'lsm_manager', diff --git a/dist/flags.py b/dist/flags.py index 3e9d8cd890c..8f7827ad160 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -65,6 +65,7 @@ flags = { 'VERB_EVICT', 'VERB_EVICTSERVER', 'VERB_FILEOPS', + 'VERB_HANDLEOPS', 'VERB_LOG', 'VERB_LSM', 'VERB_LSM_MANAGER', diff --git a/dist/s_string.ok b/dist/s_string.ok index 127ae7fd2eb..6f9be4e6141 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -661,6 +661,7 @@ gostring gostruct goutf gt +handleops hashval havesize hdr diff --git a/src/config/config_def.c b/src/config/config_def.c index c752e5eb265..5b6f0bac323 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -150,9 +150,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { confchk_wiredtiger_open_statistics_log_subconfigs, 6 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, @@ -680,9 +680,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, @@ -761,9 +761,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, @@ -837,9 +837,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, @@ -913,9 +913,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index f3db3931cfc..9ac77d3540e 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1694,6 +1694,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "evict", WT_VERB_EVICT }, { "evictserver", WT_VERB_EVICTSERVER }, { "fileops", WT_VERB_FILEOPS }, + { "handleops", WT_VERB_HANDLEOPS }, { "log", WT_VERB_LOG }, { "lsm", WT_VERB_LSM }, { "lsm_manager", WT_VERB_LSM_MANAGER }, diff --git a/src/include/extern.h b/src/include/extern.h index a1f70c4340b..7a56d7ef242 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -745,7 +745,7 @@ extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); extern int __wt_once(void (*init_routine)(void)); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, int dio_type, u_int flags, WT_FH **fhp); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, int dio_type, uint32_t flags, WT_FH **fhp); extern int __wt_os_cleanup(WT_SESSION_IMPL *session); extern int __wt_os_init(WT_SESSION_IMPL *session); extern int __wt_os_inmemory(WT_SESSION_IMPL *session); diff --git a/src/include/flags.h b/src/include/flags.h index ac2d1f20ce5..3d9b0ed716b 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -83,24 +83,25 @@ #define WT_VERB_EVICT 0x00000010 #define WT_VERB_EVICTSERVER 0x00000020 #define WT_VERB_FILEOPS 0x00000040 -#define WT_VERB_LOG 0x00000080 -#define WT_VERB_LSM 0x00000100 -#define WT_VERB_LSM_MANAGER 0x00000200 -#define WT_VERB_METADATA 0x00000400 -#define WT_VERB_MUTEX 0x00000800 -#define WT_VERB_OVERFLOW 0x00001000 -#define WT_VERB_READ 0x00002000 -#define WT_VERB_REBALANCE 0x00004000 -#define WT_VERB_RECONCILE 0x00008000 -#define WT_VERB_RECOVERY 0x00010000 -#define WT_VERB_SALVAGE 0x00020000 -#define WT_VERB_SHARED_CACHE 0x00040000 -#define WT_VERB_SPLIT 0x00080000 -#define WT_VERB_TEMPORARY 0x00100000 -#define WT_VERB_TRANSACTION 0x00200000 -#define WT_VERB_VERIFY 0x00400000 -#define WT_VERB_VERSION 0x00800000 -#define WT_VERB_WRITE 0x01000000 +#define WT_VERB_HANDLEOPS 0x00000080 +#define WT_VERB_LOG 0x00000100 +#define WT_VERB_LSM 0x00000200 +#define WT_VERB_LSM_MANAGER 0x00000400 +#define WT_VERB_METADATA 0x00000800 +#define WT_VERB_MUTEX 0x00001000 +#define WT_VERB_OVERFLOW 0x00002000 +#define WT_VERB_READ 0x00004000 +#define WT_VERB_REBALANCE 0x00008000 +#define WT_VERB_RECONCILE 0x00010000 +#define WT_VERB_RECOVERY 0x00020000 +#define WT_VERB_SALVAGE 0x00040000 +#define WT_VERB_SHARED_CACHE 0x00080000 +#define WT_VERB_SPLIT 0x00100000 +#define WT_VERB_TEMPORARY 0x00200000 +#define WT_VERB_TRANSACTION 0x00400000 +#define WT_VERB_VERIFY 0x00800000 +#define WT_VERB_VERSION 0x01000000 +#define WT_VERB_WRITE 0x02000000 #define WT_VISIBILITY_ERR 0x00000010 /* * flags section: END diff --git a/src/include/misc.i b/src/include/misc.i index 755a22df977..8ac0451b6ef 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -79,6 +79,11 @@ static inline int __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) { + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, + "%s: directory-list: %s prefix %s", + dir, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", + prefix == NULL ? "all" : prefix)); + return (S2C(session)->file_directory_list( session, dir, prefix, flags, dirlist, countp)); } @@ -88,55 +93,88 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, * Flush a directory to ensure file creation is durable. */ static inline int -__wt_directory_sync(WT_SESSION_IMPL *session, const char *path) +__wt_directory_sync(WT_SESSION_IMPL *session, const char *name) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - return (S2C(session)->file_directory_sync(session, path)); + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s: directory-sync", name)); + + return (S2C(session)->file_directory_sync(session, name)); } /* - * __wt_directory_sync_fh -- - * Flush a directory file handle to ensure file creation is durable. - * - * We don't use the normal sync path because many file systems don't require - * this step and we don't want to penalize them. + * __wt_exist -- + * Return if the file exists. */ static inline int -__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) +__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name)); + + return (S2C(session)->file_exist(session, name, existp)); +} + +/* + * __wt_remove -- + * POSIX remove. + */ +static inline int +__wt_remove(WT_SESSION_IMPL *session, const char *name) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); -#ifdef __linux__ - return (fh->fh_sync(session, fh, true)); -#else - WT_UNUSED(fh); - return (0); -#endif + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name)); + + return (S2C(session)->file_remove(session, name)); } /* - * __wt_exist -- - * Return if the file exists. + * __wt_rename -- + * POSIX rename. */ static inline int -__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) { - return (S2C(session)->file_exist(session, name, existp)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to)); + + return (S2C(session)->file_rename(session, from, to)); } /* - * __wt_fallocate -- - * Extend a file. + * __wt_filesize_name -- + * Get the size of a file in bytes, by file name. */ static inline int -__wt_fallocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +__wt_filesize_name( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name)); + + return (S2C(session)->file_size(session, name, silent, sizep)); +} + +/* + * __wt_directory_sync_fh -- + * Flush a directory file handle to ensure file creation is durable. + * + * We don't use the normal sync path because many file systems don't require + * this step and we don't want to penalize them. + */ +static inline int +__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - return (fh->fh_allocate(session, fh, offset, len)); +#ifdef __linux__ + return (fh->fh_sync(session, fh, true)); +#else + WT_UNUSED(fh); + return (0); +#endif } /* @@ -148,6 +186,9 @@ __wt_posix_fadvise(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) { #if defined(HAVE_POSIX_FADVISE) + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-advise", fh->name)); + return (fh->fh_advise(session, fh, offset, len, advice)); #else WT_UNUSED(session); @@ -160,58 +201,65 @@ __wt_posix_fadvise(WT_SESSION_IMPL *session, } /* - * __wt_file_lock -- - * Lock/unlock a file. + * __wt_fallocate -- + * Extend a file. */ static inline int -__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) +__wt_fallocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) { - return (fh->fh_lock(session, fh, lock)); -} + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); -/* - * __wt_filesize -- - * Get the size of a file in bytes, by file handle. - */ -static inline int -__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - return (fh->fh_size(session, fh, sizep)); + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-allocate: %" PRIuMAX " at %" PRIuMAX, + fh->name, (uintmax_t)len, (uintmax_t)offset)); + + return (fh->fh_allocate(session, fh, offset, len)); } /* - * __wt_filesize_name -- - * Get the size of a file in bytes, by file name. + * __wt_file_lock -- + * Lock/unlock a file. */ static inline int -__wt_filesize_name( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) { - return (S2C(session)->file_size(session, name, silent, sizep)); + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-lock: %s", fh->name, lock ? "lock" : "unlock")); + + return (fh->fh_lock(session, fh, lock)); } /* - * __wt_fsync -- - * POSIX fflush/fsync. + * __wt_vfprintf -- + * ANSI C vfprintf. */ static inline int -__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) { - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-printf", fh->name)); - return (fh->fh_sync(session, fh, block)); + return (fh->fh_printf(session, fh, fmt, ap)); } /* - * __wt_ftruncate -- - * POSIX ftruncate. + * __wt_fprintf -- + * ANSI C fprintf. */ static inline int -__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) { - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_DECL_RET; + va_list ap; - return (fh->fh_truncate(session, fh, len)); + va_start(ap, fmt); + ret = __wt_vfprintf(session, fh, fmt, ap); + va_end(ap); + + return (ret); } /* @@ -222,33 +270,57 @@ static inline int __wt_read( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) { + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX, + fh->name, len, (uintmax_t)offset)); + WT_STAT_FAST_CONN_INCR(session, read_io); return (fh->fh_read(session, fh, offset, len, buf)); } /* - * __wt_remove -- - * POSIX remove. + * __wt_filesize -- + * Get the size of a file in bytes, by file handle. */ static inline int -__wt_remove(WT_SESSION_IMPL *session, const char *name) +__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->name)); + + return (fh->fh_size(session, fh, sizep)); +} + +/* + * __wt_fsync -- + * POSIX fflush/fsync. + */ +static inline int +__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - return (S2C(session)->file_remove(session, name)); + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->name)); + + return (fh->fh_sync(session, fh, block)); } /* - * __wt_rename -- - * POSIX rename. + * __wt_ftruncate -- + * POSIX ftruncate. */ static inline int -__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - return (S2C(session)->file_rename(session, from, to)); + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-truncate: %" PRIuMAX, + fh->name, (uintmax_t)len)); + + return (fh->fh_truncate(session, fh, len)); } /* @@ -263,35 +335,11 @@ __wt_write(WT_SESSION_IMPL *session, WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX, + fh->name, len, (uintmax_t)offset)); + WT_STAT_FAST_CONN_INCR(session, write_io); return (fh->fh_write(session, fh, offset, len, buf)); } - -/* - * __wt_vfprintf -- - * ANSI C vfprintf. - */ -static inline int -__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - return (fh->fh_printf(session, fh, fmt, ap)); -} - -/* - * __wt_fprintf -- - * ANSI C fprintf. - */ -static inline int -__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) -{ - WT_DECL_RET; - va_list ap; - - va_start(ap, fmt); - ret = __wt_vfprintf(session, fh, fmt, ap); - va_end(ap); - - return (ret); -} diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 1e263f22880..279858a808e 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1922,8 +1922,8 @@ struct __wt_connection { * as a list\, such as "verbose=[evictserver\,read]"., a * list\, with values chosen from the following options: \c "api"\, \c * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c - * "evictserver"\, \c "fileops"\, \c "log"\, \c "lsm"\, \c - * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c + * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\, + * \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default @@ -2417,10 +2417,10 @@ struct __wt_connection { * list\, such as "verbose=[evictserver\,read]"., a list\, with * values chosen from the following options: \c "api"\, \c "block"\, \c * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, - * \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c - * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c - * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c - * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} + * \c "handleops"\, \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c + * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c + * "recovery"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, + * \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as "write_through=[data]". Configuring \c write_through requires diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c index d38cc58d132..0fc27b947f1 100644 --- a/src/os_posix/os_dir.c +++ b/src/os_posix/os_dir.c @@ -37,13 +37,6 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, dirallocsz = 0; dirsz = 0; entries = NULL; - if (flags == 0) - LF_SET(WT_DIRLIST_INCLUDE); - - WT_ERR(__wt_verbose(session, WT_VERB_FILEOPS, - "wt_dirlist of %s %s prefix %s", - path, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", - prefix == NULL ? "all" : prefix)); WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret); if (ret != 0) @@ -55,9 +48,9 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) continue; - match = false; /* The list of files is optionally filtered by a prefix. */ + match = false; if (prefix != NULL && ((LF_ISSET(WT_DIRLIST_INCLUDE) && WT_PREFIX_MATCH(dp->d_name, prefix)) || diff --git a/src/os_posix/os_fallocate.c b/src/os_posix/os_fallocate.c index df4d76c3b3a..22879d36182 100644 --- a/src/os_posix/os_fallocate.c +++ b/src/os_posix/os_fallocate.c @@ -120,20 +120,14 @@ __wt_posix_handle_allocate( * Check for already configured handles and make the configured call. */ case WT_FALLOCATE_POSIX: - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: posix_fallocate", fh->name)); if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: posix_fallocate", fh->name); case WT_FALLOCATE_STD: - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: fallocate", fh->name)); if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: fallocate", fh->name); case WT_FALLOCATE_SYS: - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: sys_fallocate", fh->name)); if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: sys_fallocate", fh->name); diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index b98532afc02..69390039353 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -78,13 +78,87 @@ __wt_handle_search_unlock(WT_SESSION_IMPL *session) __wt_spin_unlock(session, &S2C(session)->fh_lock); } +/* + * __open_verbose -- + * Optionally output a verbose message on handle open. + */ +static inline int +__open_verbose( + WT_SESSION_IMPL *session, const char *name, int dio_type, uint32_t flags) +{ +#ifdef HAVE_VERBOSE + if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS)) + return (0); + + /* + * It's useful to track file opens when debugging platforms, take some + * effort to output good tracking information. + */ + WT_DECL_RET; + WT_DECL_ITEM(tmp); + const char *dio_type_tag, *sep; + + switch (dio_type) { + case WT_FILE_TYPE_CHECKPOINT: + dio_type_tag = "checkpoint"; + break; + case WT_FILE_TYPE_DATA: + dio_type_tag = "data"; + break; + case WT_FILE_TYPE_DIRECTORY: + dio_type_tag = "directory"; + break; + case WT_FILE_TYPE_LOG: + dio_type_tag = "log"; + break; + case WT_FILE_TYPE_REGULAR: + dio_type_tag = "regular"; + break; + default: + dio_type_tag = "unknown open type"; + break; + } + + sep = ""; + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + +#define WT_OPEN_VERBOSE_FLAG(f, name) \ + if (LF_ISSET(f)) { \ + WT_ERR(__wt_buf_catfmt( \ + session, tmp, "%s%s", sep, name)); \ + sep = ","; \ + } + + WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_APPEND, "stream-append"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_READ, "stream-read"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_WRITE, "stream-write"); + + ret = __wt_verbose(session, WT_VERB_FILEOPS, + "%s: handle-open: type %s, flags %s", + name, dio_type_tag, (char *)tmp->data); + +err: __wt_scr_free(session, &tmp); + return (ret); +#else + WT_UNUSED(session); + WT_UNUSED(name); + WT_UNUSED(dio_type); + WT_UNUSED(flags); + return (0); +#endif +} + /* * __wt_open -- * Open a file handle. */ int __wt_open(WT_SESSION_IMPL *session, - const char *name, int dio_type, u_int flags, WT_FH **fhp) + const char *name, int dio_type, uint32_t flags, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -96,7 +170,7 @@ __wt_open(WT_SESSION_IMPL *session, fh = NULL; open_called = false; - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name)); + WT_RET(__open_verbose(session, name, dio_type, flags)); /* Check if the handle is already open. */ if (__wt_handle_search(session, name, true, true, NULL, &fh)) { @@ -175,7 +249,9 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) fh = *fhp; *fhp = NULL; - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: close", fh->name)); + /* Track handle-close as a file operation, so open and close match. */ + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s: handle-close", fh->name)); /* * If the reference count hasn't gone to 0, or if it's an in-memory diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c index 083fbce97fc..4b538ca418d 100644 --- a/src/os_win/os_dir.c +++ b/src/os_win/os_dir.c @@ -44,13 +44,6 @@ __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, dirallocsz = 0; dirsz = 0; entries = NULL; - if (flags == 0) - LF_SET(WT_DIRLIST_INCLUDE); - - WT_ERR(__wt_verbose(session, WT_VERB_FILEOPS, - "wt_dirlist of %s %s prefix %s", - pathbuf->data, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", - prefix == NULL ? "all" : prefix)); findhandle = FindFirstFileA(pathbuf->data, &finddata); if (findhandle == INVALID_HANDLE_VALUE) -- cgit v1.2.1 From 211b977285558e38da7a68a90028acddf940ca61 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 08:29:58 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Restructure the Windows directory-list code to look the same as the POSIX version. --- src/include/misc.i | 2 ++ src/os_posix/os_dir.c | 9 +++---- src/os_win/os_dir.c | 70 +++++++++++++++++++++++---------------------------- 3 files changed, 37 insertions(+), 44 deletions(-) diff --git a/src/include/misc.i b/src/include/misc.i index 8ac0451b6ef..373a74272d0 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -79,6 +79,8 @@ static inline int __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) { + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: directory-list: %s prefix %s", dir, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c index 0fc27b947f1..78ae5f8edd4 100644 --- a/src/os_posix/os_dir.c +++ b/src/os_posix/os_dir.c @@ -26,8 +26,6 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, bool match; char **entries, *path; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - *dirlist = NULL; *countp = 0; @@ -41,7 +39,8 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: directory-list: opendir", path); - for (dirsz = 0, count = 0; (dp = readdir(dirp)) != NULL;) { + + for (count = 0; (dp = readdir(dirp)) != NULL;) { /* * Skip . and .. */ @@ -74,8 +73,8 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, if (count > 0) *dirlist = entries; *countp = count; -err: - if (dirp != NULL) + +err: if (dirp != NULL) (void)closedir(dirp); __wt_free(session, path); diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c index 4b538ca418d..95a7f66827b 100644 --- a/src/os_win/os_dir.c +++ b/src/os_win/os_dir.c @@ -28,19 +28,15 @@ __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, *dirlist = NULL; *countp = 0; - findhandle = INVALID_HANDLE_VALUE; - count = 0; - WT_RET(__wt_filename(session, dir, &path)); pathlen = strlen(path); - if (path[pathlen - 1] == '\\') { + if (path[pathlen - 1] == '\\') path[pathlen - 1] = '\0'; - } - WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf)); WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", path)); + findhandle = INVALID_HANDLE_VALUE; dirallocsz = 0; dirsz = 0; entries = NULL; @@ -49,47 +45,43 @@ __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, if (findhandle == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_win32_errno(), "%s: directory-list: FindFirstFile", pathbuf->data); - else { - do { - /* - * Skip . and .. - */ - if (strcmp(finddata.cFileName, ".") == 0 || - strcmp(finddata.cFileName, "..") == 0) - continue; - match = false; + count = 0; + do { + /* + * Skip . and .. + */ + if (strcmp(finddata.cFileName, ".") == 0 || + strcmp(finddata.cFileName, "..") == 0) + continue; + + /* The list of files is optionally filtered by a prefix. */ + match = false; + if (prefix != NULL && + ((LF_ISSET(WT_DIRLIST_INCLUDE) && + WT_PREFIX_MATCH(finddata.cFileName, prefix)) || + (LF_ISSET(WT_DIRLIST_EXCLUDE) && + !WT_PREFIX_MATCH(finddata.cFileName, prefix)))) + match = true; + if (prefix == NULL || match) { /* - * The list of files is optionally filtered by a prefix. + * We have a file name we want to return. */ - if (prefix != NULL && - ((LF_ISSET(WT_DIRLIST_INCLUDE) && - WT_PREFIX_MATCH(finddata.cFileName, prefix)) || - (LF_ISSET(WT_DIRLIST_EXCLUDE) && - !WT_PREFIX_MATCH(finddata.cFileName, prefix)))) - match = true; - if (prefix == NULL || match) { - /* - * We have a file name we want to return. - */ - count++; - if (count > dirsz) { - dirsz += WT_DIR_ENTRY; - WT_ERR(__wt_realloc_def(session, - &dirallocsz, dirsz, &entries)); - } - WT_ERR(__wt_strdup(session, - finddata.cFileName, &entries[count - 1])); + count++; + if (count > dirsz) { + dirsz += WT_DIR_ENTRY; + WT_ERR(__wt_realloc_def(session, + &dirallocsz, dirsz, &entries)); } - } while (FindNextFileA(findhandle, &finddata) != 0); - } - + WT_ERR(__wt_strdup(session, + finddata.cFileName, &entries[count - 1])); + } + } while (FindNextFileA(findhandle, &finddata) != 0); if (count > 0) *dirlist = entries; *countp = count; -err: - if (findhandle != INVALID_HANDLE_VALUE) +err: if (findhandle != INVALID_HANDLE_VALUE) (void)FindClose(findhandle); __wt_free(session, path); __wt_scr_free(session, &pathbuf); -- cgit v1.2.1 From 39a91bdc4c7a9be21df6bbd2be7b362343d19dd2 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 08:56:21 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files KNF --- src/os_win/os_win.c | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index c1c5c029005..cffaa7579c7 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -472,13 +472,16 @@ __win_handle_open(WT_SESSION_IMPL *session, direct_io = false; path = NULL; - filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; + /* Set up error handling. */ + fh->filehandle = fh->filehandle_secondary = + filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; + fh->fp = NULL; /* * Opening a file handle on a directory is only to support filesystems * that require a directory sync for durability, and Windows doesn't - * require that, functionality: create empty file handles with invalid - * handles. + * require that functionality: create an empty WT_FH structure with + * invalid handles. */ if (dio_type == WT_FILE_TYPE_DIRECTORY) goto directory_open; @@ -541,24 +544,14 @@ __win_handle_open(WT_SESSION_IMPL *session, dio_type == WT_FILE_TYPE_CHECKPOINT) fh->extend_len = conn->data_extend_len; - filehandle = CreateFileA(name, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - dwCreationDisposition, - f, - NULL); + filehandle = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, + share_mode, NULL, dwCreationDisposition, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) { if (LF_ISSET(WT_OPEN_CREATE) && GetLastError() == ERROR_FILE_EXISTS) - filehandle = CreateFileA(name, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - OPEN_EXISTING, - f, - NULL); - + filehandle = CreateFileA( + name, GENERIC_READ | GENERIC_WRITE, share_mode, + NULL, OPEN_EXISTING, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_win32_errno(), direct_io ? @@ -573,18 +566,13 @@ __win_handle_open(WT_SESSION_IMPL *session, * concurrently with reads on the file. Writes would also move the file * pointer. */ - filehandle_secondary = CreateFileA(name, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - OPEN_EXISTING, - f, - NULL); + filehandle_secondary = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, + share_mode, NULL, OPEN_EXISTING, f, NULL); if (filehandle_secondary == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_win32_errno(), "%s: handle-open: CreateFileA: secondary", name); - /* Optionally configure the stream API. */ + /* Optionally configure a stdio stream API. */ switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { case WT_STREAM_APPEND: f = _O_APPEND | _O_TEXT; -- cgit v1.2.1 From 577156a39007d72cfd56923876f76849974c9ba4 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 08:56:54 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files On systems with the O_CLOEXEC open flag, specify that flag when opening a directory. On systems without the O_CLOEXEC flag, use fcntl to prevent children from accessing directory file descriptors. I can't think of a security problem, but I'm not interested in taking the risk, either. --- dist/s_string.ok | 1 + src/os_posix/os_posix.c | 73 +++++++++++++++++++++++++++++++++---------------- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index 6f9be4e6141..17b5d54c7a1 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -462,6 +462,7 @@ ckpt ckptfrag ckptlist cksum +cloexec clsm cmd cmp diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index 70acbe9bf86..f996aa4a3d1 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -470,6 +470,35 @@ __posix_handle_write(WT_SESSION_IMPL *session, return (0); } +/* + * __posix_handle_open_cloexec -- + * Prevent child access to file handles. + */ +static inline int +__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) +{ +#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) + int f; + + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. There's an obvious race + * between the open and this call, prefer the flag to open if available. + */ + if ((f = fcntl(fd, F_GETFD)) == -1 || + fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) + WT_RET_MSG(session, __wt_errno(), + "%s: handle-open: fcntl", name); + return (0); +#else + WT_UNUSED(session); + WT_UNUSED(fd); + WT_UNUSED(name); + return (0); +#endif +} + /* * __posix_handle_open -- * Open a file handle. @@ -490,8 +519,9 @@ __posix_handle_open(WT_SESSION_IMPL *session, direct_io = false; path = NULL; - /* 0 is a legal file descriptor, set up error handling. */ + /* Set up error handling. */ fh->fd = fd = -1; + fh->fp = NULL; /* Create the path to the file. */ if (!LF_ISSET(WT_OPEN_FIXED)) { @@ -500,11 +530,21 @@ __posix_handle_open(WT_SESSION_IMPL *session, } if (dio_type == WT_FILE_TYPE_DIRECTORY) { + f = O_RDONLY; +#ifdef O_CLOEXEC + /* + * Security: + * The application may spawn a new process, and we don't want + * another process to have access to our file handles. + */ + f |= O_CLOEXEC; +#endif WT_SYSCALL_RETRY(( - (fd = open(name, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); - if (ret == 0) - goto setupfh; - WT_ERR_MSG(session, ret, "%s: handle-open: open", name); + (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, "%s: handle-open: open", name); + WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + goto directory_open; } f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; @@ -550,7 +590,7 @@ __posix_handle_open(WT_SESSION_IMPL *session, f |= O_SYNC; #else WT_ERR_MSG(session, ENOTSUP, - "Unsupported log sync mode requested"); + "unsupported log sync mode configured"); #endif } @@ -561,20 +601,7 @@ __posix_handle_open(WT_SESSION_IMPL *session, "%s: handle-open: open: failed with direct I/O configured, " "some filesystem types do not support direct I/O" : "%s: handle-open: open", name); - -setupfh: -#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. There's an obvious - * race here, so we prefer the flag to open if available. - */ - if ((f = fcntl(fd, F_GETFD)) == -1 || - fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: fcntl", name); -#endif + WT_ERR(__posix_handle_open_cloexec(session, fd, name)); /* Disable read-ahead on trees: it slows down random read workloads. */ #if defined(HAVE_POSIX_FADVISE) @@ -593,7 +620,7 @@ setupfh: dio_type == WT_FILE_TYPE_CHECKPOINT) fh->extend_len = conn->data_extend_len; - /* Optionally configure the stream API. */ + /* Optionally configure a stdio stream API. */ switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { case WT_STREAM_APPEND: stream_mode = "a"; @@ -615,6 +642,7 @@ setupfh: WT_ERR_MSG(session, __wt_errno(), "%s: handle-open: fdopen", name); +directory_open: __wt_free(session, path); fh->fd = fd; @@ -640,9 +668,8 @@ err: if (fd != -1) { if (tret != 0) __wt_err(session, tret, "%s: handle-open: close", name); } + __wt_free(session, path); - fh->fd = -1; - fh->fp = NULL; return (ret); } -- cgit v1.2.1 From f7f6b9e7f5cba5dbeb8fd365bfdbceae582155a1 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 09:15:04 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Quiet MSVC warning, use a 8B value to represent a file offset. --- src/btree/bt_vrfy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 952298f2456..83dc7924312 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -84,7 +84,7 @@ __verify_config_offsets( WT_CONFIG list; WT_CONFIG_ITEM cval, k, v; WT_DECL_RET; - u_long offset; + uint64_t offset; *quitp = false; @@ -97,7 +97,7 @@ __verify_config_offsets( * verify because that's where we "dump blocks" for debugging.) */ *quitp = true; - if (v.len != 0 || sscanf(k.str, "%lu", &offset) != 1) + if (v.len != 0 || sscanf(k.str, "%" SCNu64, &offset) != 1) WT_RET_MSG(session, EINVAL, "unexpected dump offset format"); #if !defined(HAVE_DIAGNOSTIC) -- cgit v1.2.1 From 7dc7f97d915d3686167cbbd3821d699ca7a470a5 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 09:17:53 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Quiet MSVC warning, use a (size_t) to represent a pointer difference and do an explicit cast. --- src/utilities/util_load_json.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c index 9349d39bb1e..3a1f847a95f 100644 --- a/src/utilities/util_load_json.c +++ b/src/utilities/util_load_json.c @@ -213,8 +213,7 @@ json_data(WT_SESSION *session, { WT_CURSOR *cursor; WT_DECL_RET; - size_t keystrlen; - ssize_t gotnolen; + size_t gotnolen, keystrlen; uint64_t gotno, recno; int nfield, nkeys, toktype, tret; bool isrec; @@ -274,9 +273,8 @@ json_data(WT_SESSION *session, /* Verify the dump has recnos in order. */ recno++; gotno = __wt_strtouq(ins->tokstart, &endp, 0); - gotnolen = (endp - ins->tokstart); - if (recno != gotno || - ins->toklen != (size_t)gotnolen) { + gotnolen = (size_t)(endp - ins->tokstart); + if (recno != gotno || ins->toklen != gotnolen) { ret = util_err(session, 0, "%s: recno out of order", uri); goto err; -- cgit v1.2.1 From 0a1e1b4a2a125deff8da0b7ab1946df0a320ee3c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 09:24:12 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Quiet MSVC warnings, don't mix-and-match uint32_t's with size_t's. --- src/reconcile/rec_write.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index a69f335c9b3..26123f6b66d 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -2409,8 +2409,8 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) /* Finalize the header information and write the page. */ dsk->recno = last->recno; dsk->u.entries = r->entries; - dsk->mem_size = - r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); + dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + r->disk_image.size = dsk->mem_size; WT_RET( __rec_split_write(session, r, last, &r->disk_image, false)); @@ -2790,9 +2790,9 @@ no_slots: WT_STAT_FAST_DATA_INCR(session, compress_raw_fail); dsk->recno = last->recno; - dsk->mem_size = - r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); + dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; + r->disk_image.size = dsk->mem_size; r->entries = 0; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); @@ -2972,7 +2972,8 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) dsk = r->disk_image.mem; dsk->recno = bnd->recno; dsk->u.entries = r->entries; - dsk->mem_size = r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); + dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + r->disk_image.size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ return (__rec_is_checkpoint(session, r, bnd) ? @@ -6086,8 +6087,9 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session, dsk->u.datalen = (uint32_t)kv->buf.size; memcpy(WT_PAGE_HEADER_BYTE(btree, dsk), kv->buf.data, kv->buf.size); - dsk->mem_size = tmp->size = + dsk->mem_size = WT_PAGE_HEADER_BYTE_SIZE(btree) + (uint32_t)kv->buf.size; + tmp->size = dsk->mem_size; /* Write the buffer. */ addr = buf; -- cgit v1.2.1 From 3ad7ced19bd1bd622c0628226198eadd23021822 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 16:00:45 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Whitespace. --- src/btree/bt_handle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 1d33a7e7c9a..02eea9c2f0c 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -36,8 +36,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) btree = S2BT(session); /* Checkpoint files are readonly. */ - readonly = (dhandle->checkpoint != NULL || - F_ISSET(S2C(session), WT_CONN_READONLY)); + readonly = dhandle->checkpoint != NULL || + F_ISSET(S2C(session), WT_CONN_READONLY); /* Get the checkpoint information for this name/checkpoint pair. */ WT_CLEAR(ckpt); -- cgit v1.2.1 From a167c84fbefa2dea8539aaa68223f3bc3b944b55 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 25 Mar 2016 16:11:24 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files The __wt_open() "dio_type" has become a general file-type, it should always be specified. It remains a flag, because it shares definitions with WT_CONNECTION_IMPL fields, holding flags set during configuration. Remove the set of the file-type to WT_FILE_TYPE_CHECKPOINT in the block manager code; instead, test for a read-only data file in the open code. This means WT_FILE_TYPE_CHECKPOINT is purely a configuration flag, it's not used inside WiredTiger to specify the type of a file being opened. Review __wt_open calls, add file-types where not specified, add readonly where appopriate. Sue's review comment, change u_int flags to __wt_open to uint32_t. --- src/block/block_open.c | 6 +++--- src/btree/bt_huffman.c | 4 ++-- src/conn/conn_api.c | 9 +++++---- src/include/connection.h | 4 ++-- src/include/extern.h | 2 +- src/meta/meta_turtle.c | 8 ++++---- src/os_posix/os_inmemory.c | 4 ++-- src/os_posix/os_open.c | 34 ++++++++++++++++++---------------- src/os_posix/os_posix.c | 20 ++++++++++---------- src/os_win/os_win.c | 23 +++++++++++------------ 10 files changed, 58 insertions(+), 56 deletions(-) diff --git a/src/block/block_open.c b/src/block/block_open.c index 4bc75c36e86..50e44aceea9 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -157,6 +157,8 @@ __wt_block_open(WT_SESSION_IMPL *session, WT_DECL_RET; uint64_t bucket, hash; + WT_UNUSED(readonly); + WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename)); conn = S2C(session); @@ -226,9 +228,7 @@ __wt_block_open(WT_SESSION_IMPL *session, #endif /* Open the underlying file handle. */ - WT_ERR(__wt_open(session, filename, - readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA, - 0, &block->fh)); + WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh)); /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); diff --git a/src/btree/bt_huffman.c b/src/btree/bt_huffman.c index 9d83968910c..a1aaf2c7ea0 100644 --- a/src/btree/bt_huffman.c +++ b/src/btree/bt_huffman.c @@ -157,8 +157,8 @@ __huffman_confchk_file( /* Check the file exists. */ WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname)); - WT_ERR(__wt_open(session, fname, - WT_FILE_TYPE_REGULAR, WT_OPEN_FIXED | WT_STREAM_READ, &fh)); + WT_ERR(__wt_open(session, fname, WT_FILE_TYPE_REGULAR, + WT_OPEN_FIXED | WT_OPEN_READONLY | WT_STREAM_READ, &fh)); /* Optionally return the file handle. */ if (fhp == NULL) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 9ac77d3540e..e5e18c8eaed 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1197,7 +1197,8 @@ __conn_config_file(WT_SESSION_IMPL *session, return (0); /* Open the configuration file. */ - WT_RET(__wt_open(session, filename, 0, 0, &fh)); + WT_RET(__wt_open( + session, filename, WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &fh)); WT_ERR(__wt_filesize(session, fh, &size)); if (size == 0) goto err; @@ -1489,7 +1490,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) exist = false; if (!is_create) WT_ERR(__wt_exist(session, WT_WIREDTIGER, &exist)); - ret = __wt_open(session, WT_SINGLETHREAD, 0, + ret = __wt_open(session, WT_SINGLETHREAD, WT_FILE_TYPE_REGULAR, is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh); /* @@ -1544,8 +1545,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) } /* We own the lock file, optionally create the WiredTiger file. */ - ret = __wt_open( - session, WT_WIREDTIGER, 0, is_create ? WT_OPEN_CREATE : 0, &fh); + ret = __wt_open(session, WT_WIREDTIGER, + WT_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh); /* * If we're read-only, check for success as well as handled errors. diff --git a/src/include/connection.h b/src/include/connection.h index 7fcfe82f17f..c2b1dd68c18 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -438,8 +438,8 @@ struct __wt_connection_impl { int (*file_remove)(WT_SESSION_IMPL *, const char *); int (*file_rename)(WT_SESSION_IMPL *, const char *, const char *); int (*file_size)(WT_SESSION_IMPL *, const char *, bool, wt_off_t *); - int (*handle_open)( - WT_SESSION_IMPL *, WT_FH *, const char *, int, u_int); + int (*handle_open)(WT_SESSION_IMPL *, + WT_FH *, const char *, uint32_t, uint32_t); uint32_t flags; }; diff --git a/src/include/extern.h b/src/include/extern.h index 7a56d7ef242..c766a3e2c94 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -745,7 +745,7 @@ extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); extern int __wt_once(void (*init_routine)(void)); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, int dio_type, uint32_t flags, WT_FH **fhp); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp); extern int __wt_os_cleanup(WT_SESSION_IMPL *session); extern int __wt_os_init(WT_SESSION_IMPL *session); extern int __wt_os_inmemory(WT_SESSION_IMPL *session); diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index 35b6b68ec14..d958e733cbe 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -78,8 +78,8 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session) WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist)); if (!exist) return (0); - WT_RET(__wt_open(session, - WT_METADATA_BACKUP, WT_FILE_TYPE_REGULAR, WT_STREAM_READ, &fh)); + WT_RET(__wt_open(session, WT_METADATA_BACKUP, + WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh)); /* Read line pairs and load them into the metadata file. */ WT_ERR(__wt_scr_alloc(session, 512, &key)); @@ -257,8 +257,8 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) if (!exist) return (strcmp(key, WT_METAFILE_URI) == 0 ? __metadata_config(session, valuep) : WT_NOTFOUND); - WT_RET(__wt_open(session, - WT_METADATA_TURTLE, WT_FILE_TYPE_REGULAR, WT_STREAM_READ, &fh)); + WT_RET(__wt_open(session, WT_METADATA_TURTLE, + WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh)); /* Search for the key. */ WT_ERR(__wt_scr_alloc(session, 512, &buf)); diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index ded0e05d092..3ca1e99378b 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -383,11 +383,11 @@ err: __wt_spin_unlock(session, &im->lock); */ static int __im_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *path, int dio_type, u_int flags) + WT_FH *fh, const char *path, uint32_t file_type, uint32_t flags) { WT_UNUSED(session); WT_UNUSED(path); - WT_UNUSED(dio_type); + WT_UNUSED(file_type); WT_UNUSED(flags); fh->off = 0; diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 69390039353..5eacd69dca3 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -83,8 +83,8 @@ __wt_handle_search_unlock(WT_SESSION_IMPL *session) * Optionally output a verbose message on handle open. */ static inline int -__open_verbose( - WT_SESSION_IMPL *session, const char *name, int dio_type, uint32_t flags) +__open_verbose(WT_SESSION_IMPL *session, + const char *name, uint32_t file_type, uint32_t flags) { #ifdef HAVE_VERBOSE if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS)) @@ -96,26 +96,26 @@ __open_verbose( */ WT_DECL_RET; WT_DECL_ITEM(tmp); - const char *dio_type_tag, *sep; + const char *file_type_tag, *sep; - switch (dio_type) { + switch (file_type) { case WT_FILE_TYPE_CHECKPOINT: - dio_type_tag = "checkpoint"; + file_type_tag = "checkpoint"; break; case WT_FILE_TYPE_DATA: - dio_type_tag = "data"; + file_type_tag = "data"; break; case WT_FILE_TYPE_DIRECTORY: - dio_type_tag = "directory"; + file_type_tag = "directory"; break; case WT_FILE_TYPE_LOG: - dio_type_tag = "log"; + file_type_tag = "log"; break; case WT_FILE_TYPE_REGULAR: - dio_type_tag = "regular"; + file_type_tag = "regular"; break; default: - dio_type_tag = "unknown open type"; + file_type_tag = "unknown open type"; break; } @@ -139,14 +139,14 @@ __open_verbose( ret = __wt_verbose(session, WT_VERB_FILEOPS, "%s: handle-open: type %s, flags %s", - name, dio_type_tag, (char *)tmp->data); + name, file_type_tag, (char *)tmp->data); err: __wt_scr_free(session, &tmp); return (ret); #else WT_UNUSED(session); WT_UNUSED(name); - WT_UNUSED(dio_type); + WT_UNUSED(file_type); WT_UNUSED(flags); return (0); #endif @@ -158,19 +158,21 @@ err: __wt_scr_free(session, &tmp); */ int __wt_open(WT_SESSION_IMPL *session, - const char *name, int dio_type, uint32_t flags, WT_FH **fhp) + const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh; bool open_called; + WT_ASSERT(session, file_type != 0); /* A file type is required. */ + conn = S2C(session); fh = NULL; open_called = false; - WT_RET(__open_verbose(session, name, dio_type, flags)); + WT_RET(__open_verbose(session, name, file_type, flags)); /* Check if the handle is already open. */ if (__wt_handle_search(session, name, true, true, NULL, &fh)) { @@ -208,11 +210,11 @@ __wt_open(WT_SESSION_IMPL *session, WT_STRING_MATCH(name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); /* Call the underlying open function. */ - WT_ERR(conn->handle_open(session, fh, name, dio_type, flags)); + WT_ERR(conn->handle_open(session, fh, name, file_type, flags)); open_called = true; /* Set file sizes. */ - if (dio_type != WT_FILE_TYPE_DIRECTORY) + if (file_type != WT_FILE_TYPE_DIRECTORY) WT_ERR(fh->fh_size(session, fh, &fh->size)); /* diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index f996aa4a3d1..a358f70144f 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -505,7 +505,7 @@ __posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) */ static int __posix_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *name, int dio_type, uint32_t flags) + WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -529,7 +529,7 @@ __posix_handle_open(WT_SESSION_IMPL *session, name = path; } - if (dio_type == WT_FILE_TYPE_DIRECTORY) { + if (file_type == WT_FILE_TYPE_DIRECTORY) { f = O_RDONLY; #ifdef O_CLOEXEC /* @@ -569,7 +569,10 @@ __posix_handle_open(WT_SESSION_IMPL *session, f |= O_CLOEXEC; #endif #ifdef O_DIRECT - if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { + if (FLD_ISSET(conn->direct_io, file_type) || + (LF_ISSET(WT_OPEN_READONLY) && + file_type == WT_FILE_TYPE_DATA && + FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { f |= O_DIRECT; direct_io = true; } @@ -577,12 +580,11 @@ __posix_handle_open(WT_SESSION_IMPL *session, fh->direct_io = direct_io; #ifdef O_NOATIME /* Avoid updating metadata for read-only workloads. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) + if (file_type == WT_FILE_TYPE_DATA) f |= O_NOATIME; #endif - if (dio_type == WT_FILE_TYPE_LOG && + if (file_type == WT_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { #ifdef O_DSYNC f |= O_DSYNC; @@ -605,8 +607,7 @@ __posix_handle_open(WT_SESSION_IMPL *session, /* Disable read-ahead on trees: it slows down random read workloads. */ #if defined(HAVE_POSIX_FADVISE) - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) { + if (file_type == WT_FILE_TYPE_DATA) { WT_SYSCALL_RETRY( posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); if (ret != 0) @@ -616,8 +617,7 @@ __posix_handle_open(WT_SESSION_IMPL *session, #endif /* Configure file extension. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) + if (file_type == WT_FILE_TYPE_DATA) fh->extend_len = conn->data_extend_len; /* Optionally configure a stdio stream API. */ diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index cffaa7579c7..579dfa745e4 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -457,7 +457,7 @@ __win_handle_write(WT_SESSION_IMPL *session, */ static int __win_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *name, int dio_type, uint32_t flags) + WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) { DWORD dwCreationDisposition; HANDLE filehandle, filehandle_secondary; @@ -483,7 +483,7 @@ __win_handle_open(WT_SESSION_IMPL *session, * require that functionality: create an empty WT_FH structure with * invalid handles. */ - if (dio_type == WT_FILE_TYPE_DIRECTORY) + if (file_type == WT_FILE_TYPE_DIRECTORY) goto directory_open; /* Create the path to the file. */ @@ -518,30 +518,29 @@ __win_handle_open(WT_SESSION_IMPL *session, * direct_io means no OS file caching. This requires aligned buffer * allocations like O_DIRECT. */ - if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { + if (FLD_ISSET(conn->direct_io, file_type) || + (LF_ISSET(WT_OPEN_READONLY) && + file_type == WT_FILE_TYPE_DATA && + FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { f |= FILE_FLAG_NO_BUFFERING; direct_io = true; } fh->direct_io = direct_io; /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ - if (dio_type && FLD_ISSET(conn->write_through, dio_type)) { + if (FLD_ISSET(conn->write_through, file_type)) f |= FILE_FLAG_WRITE_THROUGH; - } - if (dio_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { + if (file_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) f |= FILE_FLAG_WRITE_THROUGH; - } /* Disable read-ahead on trees: it slows down random read workloads. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) + if (file_type == WT_FILE_TYPE_DATA) f |= FILE_FLAG_RANDOM_ACCESS; /* Configure file extension. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) + if (file_type == WT_FILE_TYPE_DATA) fh->extend_len = conn->data_extend_len; filehandle = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, -- cgit v1.2.1 From b88125dd1934751cfe10151825c09f99a7468672 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 26 Mar 2016 13:32:05 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files KNF. --- src/log/log_slot.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/log/log_slot.c b/src/log/log_slot.c index 570d1c9ce48..b78ef9fe4a7 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -97,14 +97,12 @@ retry: end_offset = WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered; slot->slot_end_lsn.l.offset += end_offset; - WT_STAT_FAST_CONN_INCRV(session, - log_slot_consolidated, end_offset); + WT_STAT_FAST_CONN_INCRV(session, log_slot_consolidated, end_offset); /* * XXX Would like to change so one piece of code advances the LSN. */ log->alloc_lsn = slot->slot_end_lsn; - WT_ASSERT(session, - log->alloc_lsn.l.file >= log->write_lsn.l.file); + WT_ASSERT(session, log->alloc_lsn.l.file >= log->write_lsn.l.file); return (0); } -- cgit v1.2.1 From 87f40ed3a1bcbb163c9a3c2b7f6189ad3a6a493e Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 26 Mar 2016 13:34:23 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Move WT_FH.{size,extend_size,extend_len} into WT_BLOCK, there's no reason to split the field use across the OS and block-manager layers, and the move simplifies the OS layer. --- src/block/block_addr.c | 2 +- src/block/block_ckpt.c | 6 +++--- src/block/block_compact.c | 23 +++++++++-------------- src/block/block_ext.c | 25 +++++++++---------------- src/block/block_open.c | 10 ++++++++-- src/block/block_slvg.c | 12 ++++++------ src/block/block_vrfy.c | 6 +++--- src/block/block_write.c | 22 +++++++++++----------- src/include/block.h | 7 ++++++- src/include/extern.h | 2 +- src/include/os.h | 4 ---- src/log/log.c | 11 ++++++----- src/os_posix/os_map.c | 23 ++++++++++++----------- src/os_posix/os_open.c | 4 ---- src/os_posix/os_posix.c | 4 ---- src/os_win/os_map.c | 25 +++++++++++++------------ src/os_win/os_win.c | 4 ---- 17 files changed, 88 insertions(+), 102 deletions(-) diff --git a/src/block/block_addr.c b/src/block/block_addr.c index b1f2fd9454a..d8cc1d627cf 100644 --- a/src/block/block_addr.c +++ b/src/block/block_addr.c @@ -112,7 +112,7 @@ __wt_block_addr_invalid(WT_SESSION_IMPL *session, #endif /* Check if the address is past the end of the file. */ - return (offset + size > block->fh->size ? EINVAL : 0); + return (offset + size > block->size ? EINVAL : 0); } /* diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c index 812bf99acfb..a0aadb43b93 100644 --- a/src/block/block_ckpt.c +++ b/src/block/block_ckpt.c @@ -144,7 +144,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ERR(__wt_verbose(session, WT_VERB_CHECKPOINT, "truncate file to %" PRIuMAX, (uintmax_t)ci->file_size)); WT_ERR_BUSY_OK( - __wt_block_truncate(session, block->fh, ci->file_size)); + __wt_block_truncate(session, block, ci->file_size)); } if (0) { @@ -192,7 +192,7 @@ __wt_block_checkpoint_unload( * an open checkpoint on the file), that's OK. */ WT_TRET_BUSY_OK( - __wt_block_truncate(session, block->fh, block->fh->size)); + __wt_block_truncate(session, block, block->size)); __wt_spin_lock(session, &block->live_lock); __wt_block_ckpt_destroy(session, &block->live); @@ -738,7 +738,7 @@ __ckpt_update(WT_SESSION_IMPL *session, * if there ever is, this will need to be fixed. */ if (is_live) - ci->file_size = block->fh->size; + ci->file_size = block->size; /* * Copy the checkpoint information into the checkpoint array's address diff --git a/src/block/block_compact.c b/src/block/block_compact.c index 8c9be4f029c..24ca6632311 100644 --- a/src/block/block_compact.c +++ b/src/block/block_compact.c @@ -59,20 +59,17 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) WT_DECL_RET; WT_EXT *ext; WT_EXTLIST *el; - WT_FH *fh; wt_off_t avail_eighty, avail_ninety, eighty, ninety; *skipp = true; /* Return a default skip. */ - fh = block->fh; - /* * We do compaction by copying blocks from the end of the file to the * beginning of the file, and we need some metrics to decide if it's * worth doing. Ignore small files, and files where we are unlikely * to recover 10% of the file. */ - if (fh->size <= WT_MEGABYTE) + if (block->size <= WT_MEGABYTE) return (0); /* @@ -93,8 +90,8 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) /* Sum the available bytes in the initial 80% and 90% of the file. */ avail_eighty = avail_ninety = 0; - ninety = fh->size - fh->size / 10; - eighty = fh->size - ((fh->size / 10) * 2); + ninety = block->size - block->size / 10; + eighty = block->size - ((block->size / 10) * 2); el = &block->live.avail; WT_EXT_FOREACH(ext, el->off) @@ -117,11 +114,11 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) * less useful. */ if (avail_eighty > WT_MEGABYTE && - avail_eighty >= ((fh->size / 10) * 2)) { + avail_eighty >= ((block->size / 10) * 2)) { *skipp = false; block->compact_pct_tenths = 2; } else if (avail_ninety > WT_MEGABYTE && - avail_ninety >= fh->size / 10) { + avail_ninety >= block->size / 10) { *skipp = false; block->compact_pct_tenths = 1; } @@ -140,7 +137,8 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first " "90%% of the file to perform compaction, compaction %s", block->name, - (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10, + (uintmax_t)(block->size / 10) / WT_MEGABYTE, + (uintmax_t)block->size / 10, *skipp ? "skipped" : "proceeding")); err: __wt_spin_unlock(session, &block->live_lock); @@ -159,15 +157,12 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_DECL_RET; WT_EXT *ext; WT_EXTLIST *el; - WT_FH *fh; wt_off_t limit, offset; uint32_t size, cksum; WT_UNUSED(addr_size); *skipp = true; /* Return a default skip. */ - fh = block->fh; - /* Crack the cookie. */ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); @@ -179,7 +174,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, * there's an obvious race if the file is sufficiently busy. */ __wt_spin_lock(session, &block->live_lock); - limit = fh->size - ((fh->size / 10) * block->compact_pct_tenths); + limit = block->size - ((block->size / 10) * block->compact_pct_tenths); if (offset > limit) { el = &block->live.avail; WT_EXT_FOREACH(ext, el->off) { @@ -217,7 +212,7 @@ __block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start) u_int i; el = &block->live.avail; - size = block->fh->size; + size = block->size; WT_RET(__wt_verbose(session, WT_VERB_COMPACT, "============ %s", diff --git a/src/block/block_ext.c b/src/block/block_ext.c index ab5d5604087..caafcc77c48 100644 --- a/src/block/block_ext.c +++ b/src/block/block_ext.c @@ -462,17 +462,13 @@ static inline int __block_extend( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size) { - WT_FH *fh; - - fh = block->fh; - /* * Callers of this function are expected to have already acquired any * locks required to extend the file. * * We should never be allocating from an empty file. */ - if (fh->size < block->allocsize) + if (block->size < block->allocsize) WT_RET_MSG(session, EINVAL, "file has no description information"); @@ -482,12 +478,12 @@ __block_extend( * 8B bits (we currently check an wt_off_t is 8B in verify_build.h). I * don't think we're likely to see anything bigger for awhile. */ - if (fh->size > (wt_off_t)INT64_MAX - size) + if (block->size > (wt_off_t)INT64_MAX - size) WT_RET_MSG(session, WT_ERROR, "block allocation failed, file cannot grow further"); - *offp = fh->size; - fh->size += size; + *offp = block->size; + block->size += size; WT_STAT_FAST_DATA_INCR(session, block_extension); WT_RET(__wt_verbose(session, WT_VERB_BLOCK, @@ -1343,19 +1339,16 @@ __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el) { WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH]; - WT_FH *fh; wt_off_t orig, size; - fh = block->fh; - /* * Check if the last available extent is at the end of the file, and if * so, truncate the file and discard the extent. */ if ((ext = __block_off_srch_last(el->off, astack)) == NULL) return (0); - WT_ASSERT(session, ext->off + ext->size <= fh->size); - if (ext->off + ext->size < fh->size) + WT_ASSERT(session, ext->off + ext->size <= block->size); + if (ext->off + ext->size < block->size) return (0); /* @@ -1363,10 +1356,10 @@ __wt_block_extlist_truncate( * the cached file size, and that can't happen until after the extent * list removal succeeds.) */ - orig = fh->size; + orig = block->size; size = ext->off; WT_RET(__block_off_remove(session, block, el, size, NULL)); - fh->size = size; + block->size = size; /* * Truncate the file. The truncate might fail if there's a file mapping @@ -1376,7 +1369,7 @@ __wt_block_extlist_truncate( WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "truncate file from %" PRIdMAX " to %" PRIdMAX, (intmax_t)orig, (intmax_t)size)); - WT_RET_BUSY_OK(__wt_block_truncate(session, block->fh, size)); + WT_RET_BUSY_OK(__wt_block_truncate(session, block, size)); return (0); } diff --git a/src/block/block_open.c b/src/block/block_open.c index 50e44aceea9..fd0e60bba45 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -230,6 +230,12 @@ __wt_block_open(WT_SESSION_IMPL *session, /* Open the underlying file handle. */ WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh)); + /* Set the file's size. */ + WT_ERR(__wt_filesize(session, block->fh, &block->size)); + + /* Set the file extension information. */ + block->extend_len = conn->data_extend_len; + /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); @@ -406,7 +412,7 @@ __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats) WT_STAT_WRITE(stats, block_minor, WT_BLOCK_MINOR_VERSION); WT_STAT_WRITE( stats, block_reuse_bytes, (int64_t)block->live.avail.bytes); - WT_STAT_WRITE(stats, block_size, block->fh->size); + WT_STAT_WRITE(stats, block_size, block->size); } /* @@ -418,7 +424,7 @@ __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) { WT_UNUSED(session); - *sizep = bm->block->fh == NULL ? 0 : bm->block->fh->size; + *sizep = bm->block->size; return (0); } diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c index a8cccd53023..a8fd23f25c0 100644 --- a/src/block/block_slvg.c +++ b/src/block/block_slvg.c @@ -33,10 +33,10 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) * Truncate the file to an allocation-size multiple of blocks (bytes * trailing the last block must be garbage, by definition). */ - if (block->fh->size > allocsize) { - len = (block->fh->size / allocsize) * allocsize; - if (len != block->fh->size) - WT_RET(__wt_block_truncate(session, block->fh, len)); + if (block->size > allocsize) { + len = (block->size / allocsize) * allocsize; + if (len != block->size) + WT_RET(__wt_block_truncate(session, block, len)); } else len = allocsize; block->live.file_size = len; @@ -83,7 +83,7 @@ __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size) if (size > WT_BTREE_PAGE_SIZE_MAX) /* > maximum page size */ return (true); /* past end-of-file */ - if (offset + (wt_off_t)size > block->fh->size) + if (offset + (wt_off_t)size > block->size) return (true); return (false); } @@ -111,7 +111,7 @@ __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_ERR(__wt_scr_alloc(session, allocsize, &tmp)); /* Read through the file, looking for pages. */ - for (max = fh->size;;) { + for (max = block->size;;) { offset = block->slvg_off; if (offset >= max) { /* Check eof. */ *eofp = 1; diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c index 35c7a2c218c..b31a7b8a1b6 100644 --- a/src/block/block_vrfy.c +++ b/src/block/block_vrfy.c @@ -57,7 +57,7 @@ __wt_block_verify_start(WT_SESSION_IMPL *session, * a file immediately after creation or the checkpoint doesn't reflect * any of the data pages). */ - size = block->fh->size; + size = block->size; if (size <= block->allocsize) return (0); @@ -156,7 +156,7 @@ __verify_last_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt) ci = &_ci; WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name)); WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci)); - WT_ERR(__wt_block_truncate(session, block->fh, ci->file_size)); + WT_ERR(__wt_block_truncate(session, block, ci->file_size)); err: __wt_block_ckpt_destroy(session, ci); return (ret); @@ -368,7 +368,7 @@ __verify_filefrag_add(WT_SESSION_IMPL *session, WT_BLOCK *block, (uintmax_t)offset, (uintmax_t)(offset + size), (uintmax_t)size)); /* Check each chunk against the total file size. */ - if (offset + size > block->fh->size) + if (offset + size > block->size) WT_RET_MSG(session, WT_ERROR, "fragment %" PRIuMAX "-%" PRIuMAX " references " "non-existent file blocks", diff --git a/src/block/block_write.c b/src/block/block_write.c index 49da084cee2..a56893aee23 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -25,11 +25,11 @@ __wt_block_header(WT_BLOCK *block) * Truncate the file. */ int -__wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) { - WT_RET(__wt_ftruncate(session, fh, len)); + WT_RET(__wt_ftruncate(session, block->fh, len)); - fh->size = fh->extend_size = len; + block->size = block->extend_size = len; return (0); } @@ -61,7 +61,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, locked = true; /* If not configured to extend the file, we're done. */ - if (fh->extend_len == 0) + if (block->extend_len == 0) return (0); /* @@ -73,9 +73,9 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * why there's a check in case the extended file size becomes too small: * if the file size catches up, every thread tries to extend it. */ - if (fh->extend_size > fh->size && - (offset > fh->extend_size || - offset + fh->extend_len + (wt_off_t)align_size < fh->extend_size)) + if (block->extend_size > block->size && + (offset > block->extend_size || offset + + block->extend_len + (wt_off_t)align_size < block->extend_size)) return (0); /* @@ -108,9 +108,9 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * and that's OK, we simply may do another extension sooner than * otherwise. */ - fh->extend_size = fh->size + fh->extend_len * 2; + block->extend_size = block->size + block->extend_len * 2; if ((ret = __wt_fallocate( - session, fh, fh->size, fh->extend_len * 2)) == 0) + session, fh, block->size, block->extend_len * 2)) == 0) return (0); if (ret != ENOTSUP) return (ret); @@ -130,13 +130,13 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * extend length after locking so we don't overwrite already-written * blocks. */ - fh->extend_size = fh->size + fh->extend_len * 2; + block->extend_size = block->size + block->extend_len * 2; /* * The truncate might fail if there's a mapped file (in other words, if * there's an open checkpoint on the file), that's OK. */ - if ((ret = __wt_ftruncate(session, fh, fh->extend_size)) == EBUSY) + if ((ret = __wt_ftruncate(session, fh, block->extend_size)) == EBUSY) ret = 0; return (ret); } diff --git a/src/include/block.h b/src/include/block.h index 10efd35086c..cdcf82cde11 100644 --- a/src/include/block.h +++ b/src/include/block.h @@ -217,10 +217,15 @@ struct __wt_block { /* A list of block manager handles, sharing a file descriptor. */ uint32_t ref; /* References */ - WT_FH *fh; /* Backing file handle */ TAILQ_ENTRY(__wt_block) q; /* Linked list of handles */ TAILQ_ENTRY(__wt_block) hashq; /* Hashed list of handles */ + WT_FH *fh; /* Backing file handle */ + wt_off_t size; /* File size */ + + wt_off_t extend_size; /* File extended size */ + wt_off_t extend_len; /* File extend chunk size */ + /* Configuration information, set when the file is opened. */ uint32_t allocfirst; /* Allocation is first-fit */ uint32_t allocsize; /* Allocation size */ diff --git a/src/include/extern.h b/src/include/extern.h index c766a3e2c94..02bcb863b20 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -74,7 +74,7 @@ extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size); extern u_int __wt_block_header(WT_BLOCK *block); -extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len); +extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len); extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep); extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum); extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool caller_locked); diff --git a/src/include/os.h b/src/include/os.h index 0e27ea96d23..08fcc3db7e9 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -85,10 +85,6 @@ struct __wt_fh { HANDLE filehandle_secondary; /* Windows file handle for file size changes */ #endif - wt_off_t size; /* File size */ - wt_off_t extend_size; /* File extended size */ - wt_off_t extend_len; /* File extend chunk size */ - /* * Underlying in-memory handle support. */ diff --git a/src/log/log.c b/src/log/log.c index ab8e2a4f2de..1132b54f335 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -1204,18 +1204,18 @@ __wt_log_close(WT_SESSION_IMPL *session) * file is zeroes. */ static int -__log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, bool *hole) +__log_has_hole(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t log_size, wt_off_t offset, bool *hole) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; - wt_off_t log_size, off, remainder; + wt_off_t off, remainder; size_t bufsz, rdlen; char *buf, *zerobuf; conn = S2C(session); log = conn->log; - log_size = fh->size; remainder = log_size - offset; *hole = false; @@ -1559,7 +1559,8 @@ advance: * See if there is anything non-zero at the * end of this log file. */ - WT_ERR(__log_has_hole(session, log_fh, + WT_ERR(__log_has_hole( + session, log_fh, log_size, rd_lsn.l.offset, &partial_record)); /* * If we read the last record, go to the next file. @@ -1623,7 +1624,7 @@ advance: */ if (reclen == 0) { WT_ERR(__log_has_hole( - session, log_fh, rd_lsn.l.offset, &eol)); + session, log_fh, log_size, rd_lsn.l.offset, &eol)); if (eol) /* Found a hole. This LSN is the end. */ break; diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 7abd50e89f9..03daba698a8 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -16,22 +16,23 @@ int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) { + size_t len; void *map; - size_t orig_size; + wt_off_t file_size; WT_UNUSED(mappingcookie); WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); /* - * Record the current size and only map and set that as the length, it - * could change between the map call and when we set the return length. - * For the same reason we could actually map past the end of the file; - * we don't read bytes past the end of the file though, so as long as - * the map call succeeds, it's all OK. + * There's no locking here to prevent the underlying file from changing + * underneath us, our caller needs to ensure consistency of the mapped + * region vs. any other file activity. */ - orig_size = (size_t)fh->size; - if ((map = mmap(NULL, orig_size, + WT_RET(__wt_filesize(session, fh, &file_size)); + len = (size_t)file_size; + + if ((map = mmap(NULL, len, PROT_READ, #ifdef MAP_NOCORE MAP_NOCORE | @@ -40,13 +41,13 @@ __wt_mmap(WT_SESSION_IMPL *session, fh->fd, (wt_off_t)0)) == MAP_FAILED) { WT_RET_MSG(session, __wt_errno(), "%s map error: failed to map %" WT_SIZET_FMT " bytes", - fh->name, orig_size); + fh->name, len); } (void)__wt_verbose(session, WT_VERB_FILEOPS, - "%s: map %p: %" WT_SIZET_FMT " bytes", fh->name, map, orig_size); + "%s: map %p: %" WT_SIZET_FMT " bytes", fh->name, map, len); *(void **)mapp = map; - *lenp = orig_size; + *lenp = len; return (0); } diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 5eacd69dca3..de27ab83667 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -213,10 +213,6 @@ __wt_open(WT_SESSION_IMPL *session, WT_ERR(conn->handle_open(session, fh, name, file_type, flags)); open_called = true; - /* Set file sizes. */ - if (file_type != WT_FILE_TYPE_DIRECTORY) - WT_ERR(fh->fh_size(session, fh, &fh->size)); - /* * Repeat the check for a match: if there's no match, link our newly * created handle onto the database's list of files. diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index a358f70144f..c6c03ca7502 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -616,10 +616,6 @@ __posix_handle_open(WT_SESSION_IMPL *session, } #endif - /* Configure file extension. */ - if (file_type == WT_FILE_TYPE_DATA) - fh->extend_len = conn->data_extend_len; - /* Optionally configure a stdio stream API. */ switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { case WT_STREAM_APPEND: diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index c41e0f83ae7..74fb3c4ecb4 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -16,40 +16,41 @@ int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) { + size_t len; void *map; - size_t orig_size; + wt_off_t file_size; /* - * Record the current size and only map and set that as the length, it - * could change between the map call and when we set the return length. - * For the same reason we could actually map past the end of the file; - * we don't read bytes past the end of the file though, so as long as - * the map call succeeds, it's all OK. + * There's no locking here to prevent the underlying file from changing + * underneath us, our caller needs to ensure consistency of the mapped + * region vs. any other file activity. */ - orig_size = (size_t)fh->size; + WT_RET(__wt_filesize(session, fh, &file_size)); + len = (size_t)file_size; + *mappingcookie = CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); if (*mappingcookie == NULL) WT_RET_MSG(session, __wt_win32_errno(), "%s CreateFileMapping error: failed to map %" WT_SIZET_FMT " bytes", - fh->name, orig_size); + fh->name, len); if ((map = MapViewOfFile( - *mappingcookie, FILE_MAP_READ, 0, 0, orig_size)) == NULL) { + *mappingcookie, FILE_MAP_READ, 0, 0, len)) == NULL) { CloseHandle(*mappingcookie); *mappingcookie = NULL; WT_RET_MSG(session, __wt_win32_errno(), "%s map error: failed to map %" WT_SIZET_FMT " bytes", - fh->name, orig_size); + fh->name, len); } (void)__wt_verbose(session, WT_VERB_FILEOPS, "%s: MapViewOfFile %p: %" WT_SIZET_FMT " bytes", - fh->name, map, orig_size); + fh->name, map, len); *(void **)mapp = map; - *lenp = orig_size; + *lenp = len; return (0); } diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 579dfa745e4..88ca187ae4b 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -539,10 +539,6 @@ __win_handle_open(WT_SESSION_IMPL *session, if (file_type == WT_FILE_TYPE_DATA) f |= FILE_FLAG_RANDOM_ACCESS; - /* Configure file extension. */ - if (file_type == WT_FILE_TYPE_DATA) - fh->extend_len = conn->data_extend_len; - filehandle = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, share_mode, NULL, dwCreationDisposition, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) { -- cgit v1.2.1 From af008788393783f59d501bcb5c0b6465c5eb8a3b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 26 Mar 2016 14:43:00 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Use #ifdef instead of #ifndef. --- src/include/os.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/include/os.h b/src/include/os.h index 08fcc3db7e9..2ff78b2777f 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -77,14 +77,15 @@ struct __wt_fh { /* * Underlying file system handle support. */ - FILE *fp; /* ANSI C file handle */ -#ifndef _WIN32 - int fd; /* POSIX file handle */ -#else +#ifdef _WIN32 HANDLE filehandle; /* Windows file handle */ HANDLE filehandle_secondary; /* Windows file handle for file size changes */ +#else + int fd; /* POSIX file handle */ #endif + FILE *fp; /* ANSI C stdio handle */ + /* * Underlying in-memory handle support. */ -- cgit v1.2.1 From 9240bb3d52c4b0c724c78be268540bc12f54d3d2 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 26 Mar 2016 18:09:32 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Remove Linux-specific preload and memory-mapping code from the block manager. Get rid of the block-manager calls to posix_fadvise(), support ENOTSUP returns up the stack from the underlying support. General reworking of block preload/discard code to stop attempts whenever ENOTSUP is return on a handle. Remove block manager handle open tests of cache-max and dirty-cache-max configuration conflicts with specifying direct I/O, lacking posix_fadvise and lacking sync_file_range. We can't know in the block manager what the underlying layer can do, and also, the old code was wrong, direct I/O is set on a file type basis, so testing against a specific handle open isn't correct. Inline the block-header-size function, we call it a lot and it's simply returning a size. --- dist/s_define.list | 1 + dist/s_string.ok | 3 ++ src/block/block_map.c | 8 ------ src/block/block_open.c | 29 ++----------------- src/block/block_read.c | 72 ++++++++++++++++++++++------------------------ src/block/block_write.c | 56 ++++++++++++++++++++++-------------- src/btree/bt_discard.c | 3 +- src/include/block.h | 14 ++++++++- src/include/extern.h | 7 +++-- src/include/msvc.h | 11 +++++++ src/os_posix/os_inmemory.c | 2 +- src/os_posix/os_map.c | 60 +++++++++++++++++++++++++++----------- src/os_posix/os_posix.c | 19 +++++++++++- src/os_win/os_map.c | 12 +++++--- src/os_win/os_win.c | 6 +++- 15 files changed, 181 insertions(+), 122 deletions(-) diff --git a/dist/s_define.list b/dist/s_define.list index e3f0dc7f181..c9777c86675 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -16,6 +16,7 @@ WIN32_LEAN_AND_MEAN WT_ATOMIC_CAS WT_ATOMIC_FUNC WT_BLOCK_DESC_SIZE +WT_BLOCK_HEADER_SIZE WT_CACHE_LINE_ALIGNMENT WT_COMPILER_TYPE_ALIGN WT_CONN_CHECK_PANIC diff --git a/dist/s_string.ok b/dist/s_string.ok index 17b5d54c7a1..ba826339b9f 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -77,6 +77,7 @@ DESC DHANDLE DNE DOI +DONTNEED DUPLICATEV DbCursor DbEnv @@ -189,6 +190,7 @@ LoadLoad LockFile Lookaside Lookup +MADV MALLOC MEM MEMALIGN @@ -338,6 +340,7 @@ Vixie Vo VxWorks WAL +WILLNEED WIREDTIGER WRLSN WRNOLOCK diff --git a/src/block/block_map.c b/src/block/block_map.c index b60623a37d8..c21ca9cfa19 100644 --- a/src/block/block_map.c +++ b/src/block/block_map.c @@ -41,14 +41,6 @@ __wt_block_map( if (block->verify) return (0); - /* - * Turn off mapping when direct I/O is configured for the file, the - * Linux open(2) documentation says applications should avoid mixing - * mmap(2) of files with direct I/O to the same files. - */ - if (block->fh->direct_io) - return (0); - /* * Turn off mapping if the application configured a cache size maximum, * we can't control how much of the cache size we use in that case. diff --git a/src/block/block_open.c b/src/block/block_open.c index fd0e60bba45..0fef6ad0e66 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -196,36 +196,10 @@ __wt_block_open(WT_SESSION_IMPL *session, /* Configuration: optional OS buffer cache maximum size. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval)); block->os_cache_max = (size_t)cval.val; -#ifdef HAVE_POSIX_FADVISE - if (conn->direct_io && block->os_cache_max) - WT_ERR_MSG(session, EINVAL, - "os_cache_max not supported in combination with direct_io"); -#else - if (block->os_cache_max) - WT_ERR_MSG(session, EINVAL, - "os_cache_max not supported if posix_fadvise not " - "available"); -#endif /* Configuration: optional immediate write scheduling flag. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval)); block->os_cache_dirty_max = (size_t)cval.val; -#ifdef HAVE_SYNC_FILE_RANGE - if (conn->direct_io && block->os_cache_dirty_max) - WT_ERR_MSG(session, EINVAL, - "os_cache_dirty_max not supported in combination with " - "direct_io"); -#else - if (block->os_cache_dirty_max) { - /* - * Ignore any setting if it is not supported. - */ - block->os_cache_dirty_max = 0; - WT_ERR(__wt_verbose(session, WT_VERB_BLOCK, - "os_cache_dirty_max ignored when sync_file_range not " - "available")); - } -#endif /* Open the underlying file handle. */ WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh)); @@ -236,6 +210,9 @@ __wt_block_open(WT_SESSION_IMPL *session, /* Set the file extension information. */ block->extend_len = conn->data_extend_len; + /* Set the preload availability. */ + block->preload_available = true; + /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); diff --git a/src/block/block_read.c b/src/block/block_read.c index 9386974238d..d0522e155fc 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -26,33 +26,41 @@ __wt_bm_preload( WT_UNUSED(addr_size); block = bm->block; - /* - * Turn off pre-load when direct I/O is configured for the file, - * the kernel cache isn't interesting. - */ - if (block->fh->direct_io) - return (0); - WT_STAT_FAST_CONN_INCR(session, block_preload); - /* Crack the cookie. */ - WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); - - /* Check for a mapped block. */ - mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; - if (mapped) - return (__wt_mmap_preload( - session, (uint8_t *)bm->map + offset, size)); + /* Preload the block. */ + if (block->preload_available) { + /* Crack the cookie. */ + WT_RET(__wt_block_buffer_to_addr( + block, addr, &offset, &size, &cksum)); + + mapped = bm->map != NULL && + offset + size <= (wt_off_t)bm->maplen; + if (mapped) + ret = __wt_mmap_preload(session, + block->fh, (uint8_t *)bm->map + offset, size); + else + ret = __wt_posix_fadvise(session, + block->fh, (wt_off_t)offset, + (wt_off_t)size, POSIX_FADV_WILLNEED); + if (ret == 0) + return (0); -#ifdef HAVE_POSIX_FADVISE - if (__wt_posix_fadvise(session, block->fh, - (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED) == 0) - return (0); -#endif + /* Ignore ENOTSUP, but don't try again. */ + if (ret != ENOTSUP) + return (ret); + block->preload_available = false; + } - WT_RET(__wt_scr_alloc(session, size, &tmp)); - ret = __wt_block_read_off(session, block, tmp, offset, size, cksum); + /* + * If preload isn't supported, do it the slow way; don't call the + * underlying read routine directly, we don't know for certain if + * this is a mapped range. + */ + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + ret = __wt_bm_read(bm, session, tmp, addr, addr_size); __wt_scr_free(session, &tmp); + return (ret); } @@ -82,7 +90,9 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, if (mapped) { buf->data = (uint8_t *)bm->map + offset; buf->size = size; - WT_RET(__wt_mmap_preload(session, buf->data, buf->size)); + if (block->preload_available) + WT_RET(__wt_mmap_preload( + session, block->fh, buf->data, buf->size)); WT_STAT_FAST_CONN_INCR(session, block_map_read); WT_STAT_FAST_CONN_INCRV(session, block_byte_map_read, size); @@ -100,21 +110,9 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, /* Read the block. */ WT_RET(__wt_block_read_off(session, block, buf, offset, size, cksum)); -#ifdef HAVE_POSIX_FADVISE /* Optionally discard blocks from the system's buffer cache. */ - if (block->os_cache_max != 0 && - (block->os_cache += size) > block->os_cache_max) { - WT_DECL_RET; - - block->os_cache = 0; - /* Ignore EINVAL - some file systems don't support the flag. */ - if ((ret = __wt_posix_fadvise(session, block->fh, - (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0 && - ret != EINVAL) - WT_RET_MSG( - session, ret, "%s: posix_fadvise", block->name); - } -#endif + WT_RET(__wt_block_discard(session, block, (size_t)size)); + return (0); } diff --git a/src/block/block_write.c b/src/block/block_write.c index a56893aee23..d6599d81a8e 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -9,28 +9,47 @@ #include "wt_internal.h" /* - * __wt_block_header -- - * Return the size of the block-specific header. + * __wt_block_truncate -- + * Truncate the file. */ -u_int -__wt_block_header(WT_BLOCK *block) +int +__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) { - WT_UNUSED(block); + WT_RET(__wt_ftruncate(session, block->fh, len)); + + block->size = block->extend_size = len; - return ((u_int)WT_BLOCK_HEADER_SIZE); + return (0); } /* - * __wt_block_truncate -- - * Truncate the file. + * __wt_block_discard -- + * Discard blocks from the system buffer cache. */ int -__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) +__wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) { - WT_RET(__wt_ftruncate(session, block->fh, len)); + WT_DECL_RET; - block->size = block->extend_size = len; + if (block->os_cache_max == 0) + return (0); + + /* + * We're racing on the addition, but I'm not willing to serialize on it + * in the standard read path with more evidence it's needed. + */ + if ((block->os_cache += added_size) <= block->os_cache_max) + return (0); + block->os_cache = 0; + WT_ERR(__wt_posix_fadvise(session, block->fh, + (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)); + return (0); + +err: /* Ignore ENOTSUP, but don't try again. */ + if (ret != ENOTSUP) + return (ret); + block->os_cache_max = 0; return (0); } @@ -330,17 +349,10 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_RET(__wt_fsync(session, fh, false)); } #endif -#ifdef HAVE_POSIX_FADVISE - /* Optionally discard blocks from the system buffer cache. */ - if (block->os_cache_max != 0 && - (block->os_cache += align_size) > block->os_cache_max) { - block->os_cache = 0; - if ((ret = __wt_posix_fadvise(session, fh, - (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0) - WT_RET_MSG( - session, ret, "%s: posix_fadvise", block->name); - } -#endif + + /* Optionally discard blocks from the buffer cache. */ + WT_RET(__wt_block_discard(session, block, align_size)); + WT_STAT_FAST_CONN_INCR(session, block_write); WT_STAT_FAST_CONN_INCRV(session, block_byte_write, align_size); diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index 1f739c9572e..5983c7e4f18 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -134,7 +134,8 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) __wt_overwrite_and_free_len(session, dsk, dsk->mem_size); if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) - (void)__wt_mmap_discard(session, dsk, dsk->mem_size); + (void)__wt_mmap_discard( + session, S2BT(session)->bm->block->fh, dsk, dsk->mem_size); __wt_overwrite_and_free(session, page); } diff --git a/src/include/block.h b/src/include/block.h index cdcf82cde11..76891e1e5f6 100644 --- a/src/include/block.h +++ b/src/include/block.h @@ -222,9 +222,9 @@ struct __wt_block { WT_FH *fh; /* Backing file handle */ wt_off_t size; /* File size */ - wt_off_t extend_size; /* File extended size */ wt_off_t extend_len; /* File extend chunk size */ + bool preload_available; /* File pages can be preloaded. */ /* Configuration information, set when the file is opened. */ uint32_t allocfirst; /* Allocation is first-fit */ @@ -404,3 +404,15 @@ __wt_block_header_byteswap(WT_BLOCK_HEADER *blk) */ #define WT_BLOCK_COMPRESS_SKIP 64 #define WT_BLOCK_ENCRYPT_SKIP WT_BLOCK_HEADER_BYTE_SIZE + +/* + * __wt_block_header -- + * Return the size of the block-specific header. + */ +static inline u_int +__wt_block_header(WT_BLOCK *block) +{ + WT_UNUSED(block); + + return ((u_int)WT_BLOCK_HEADER_SIZE); +} diff --git a/src/include/extern.h b/src/include/extern.h index 02bcb863b20..ca3f19c14f1 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -73,8 +73,8 @@ extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci); extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size); -extern u_int __wt_block_header(WT_BLOCK *block); extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len); +extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size); extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep); extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum); extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool caller_locked); @@ -741,8 +741,9 @@ extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, con extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); extern int __wt_map_error_rdonly(int error); extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); -extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); -extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); +extern int __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); +extern int __wt_mmap_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); +extern int __wt_mmap_preload_madvise( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); extern int __wt_once(void (*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp); diff --git a/src/include/msvc.h b/src/include/msvc.h index d5be5bd8c60..222c24c3bc6 100644 --- a/src/include/msvc.h +++ b/src/include/msvc.h @@ -16,6 +16,17 @@ #define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */ #define WT_SIZET_FMT "Iu" /* size_t format string */ +/* + * The Windows fadvise calls will return ENOTSUP, but the WiredTiger code + * currently uses POSIX flags in the API. + */ +#ifndef POSIX_FADV_DONTNEED +#define POSIX_FADV_DONTNEED 0 +#endif +#ifndef POSIX_FADV_WILLNEED +#define POSIX_FADV_WILLNEED 0 +#endif + /* * Add MSVC-specific attributes and pragmas to types and function declarations. */ diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index 3ca1e99378b..4d5b35e1499 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -173,7 +173,7 @@ __im_handle_advise(WT_SESSION_IMPL *session, WT_UNUSED(offset); WT_UNUSED(len); WT_UNUSED(advice); - return (0); + return (ENOTSUP); } /* diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 03daba698a8..bb0757d1595 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -24,6 +24,14 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + /* + * Mapping isn't possible if direct I/O configured for the file, the + * Linux open(2) documentation says applications should avoid mixing + * mmap(2) of files with direct I/O to the same files. + */ + if (fh->direct_io) + return (ENOTSUP); + /* * There's no locking here to prevent the underlying file from changing * underneath us, our caller needs to ensure consistency of the mapped @@ -51,24 +59,24 @@ __wt_mmap(WT_SESSION_IMPL *session, return (0); } +#ifdef HAVE_POSIX_MADVISE /* - * __wt_mmap_preload -- + * __wt_mmap_preload_madvise -- * Cause a section of a memory map to be faulted in. */ int -__wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) +__wt_mmap_preload_madvise( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) { -#ifdef HAVE_POSIX_MADVISE - /* Linux requires the address be aligned to a 4KB boundary. */ WT_BM *bm; WT_CONNECTION_IMPL *conn; WT_DECL_RET; void *blk; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - conn = S2C(session); bm = S2BT(session)->bm; + + /* Linux requires the address be aligned to a 4KB boundary. */ blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); @@ -90,14 +98,30 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) if (size > (size_t)conn->page_size && (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) != 0) - WT_RET_MSG(session, ret, "posix_madvise will need"); + WT_RET_MSG(session, ret, + "%s: posix_madvise: POSIX_MADV_WILLNEED", fh->name); + return (0); +} +#endif + +/* + * __wt_mmap_preload -- + * Cause a section of a memory map to be faulted in. + */ +int +__wt_mmap_preload( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + +#ifdef HAVE_POSIX_MADVISE + return (__wt_mmap_preload_madvise(session, fh, p, size)); #else - WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); + return (ENOTSUP); #endif - - return (0); } /* @@ -105,28 +129,30 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) * Discard a chunk of the memory map. */ int -__wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size) +__wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) { + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + #ifdef HAVE_POSIX_MADVISE - /* Linux requires the address be aligned to a 4KB boundary. */ WT_CONNECTION_IMPL *conn; WT_DECL_RET; void *blk; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - conn = S2C(session); + + /* Linux requires the address be aligned to a 4KB boundary. */ blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) != 0) - WT_RET_MSG(session, ret, "posix_madvise don't need"); + WT_RET_MSG(session, ret, + "%s: posix_madvise: POSIX_MADV_DONTNEED", fh->name); #else - WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); #endif - return (0); + return (ENOTSUP); } /* diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index c6c03ca7502..a89cf13a760 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -247,9 +247,24 @@ __posix_handle_advise(WT_SESSION_IMPL *session, #if defined(HAVE_POSIX_FADVISE) WT_DECL_RET; + /* + * Refuse pre-load when direct I/O is configured for the file, the + * kernel cache isn't interesting. + */ + if (advice == POSIX_MADV_WILLNEED && fh->direct_io) + return (ENOTSUP); + WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); if (ret == 0) return (0); + + /* + * Treat EINVAL as not-supported, some systems don't support some flags. + * Quietly fail, callers expect not-supported failures. + */ + if (ret == EINVAL) + return (ENOTSUP); + WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name); #else WT_UNUSED(session); @@ -257,7 +272,9 @@ __posix_handle_advise(WT_SESSION_IMPL *session, WT_UNUSED(offset); WT_UNUSED(len); WT_UNUSED(advice); - return (0); + + /* Quietly fail, callers expect not-supported failures. */ + return (ENOTSUP); #endif } diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index 74fb3c4ecb4..b56929ce0e5 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -59,13 +59,15 @@ __wt_mmap(WT_SESSION_IMPL *session, * Cause a section of a memory map to be faulted in. */ int -__wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) +__wt_mmap_preload( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) { WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); - return (0); + return (ENOTSUP); } /* @@ -73,12 +75,14 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) * Discard a chunk of the memory map. */ int -__wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size) +__wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) { WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); - return (0); + + return (ENOTSUP); } /* diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 88ca187ae4b..3d7a71deb03 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -162,10 +162,14 @@ static int __win_handle_advise(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) { + WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(offset); WT_UNUSED(len); WT_UNUSED(advice); - WT_RET_MSG(session, ENOTSUP, "%s: handle-advise", fh->name); + + /* Quietly fail, callers expect not-supported failures. */ + return (ENOTSUP); } /* -- cgit v1.2.1 From e60c96da5156c228da3fc61c99bb8b21eb5f0489 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 26 Mar 2016 23:11:28 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files ISO C90 forbids mixed declarations and code --- src/include/extern.h | 1 - src/os_posix/os_map.c | 50 ++++++++++++++++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index ca3f19c14f1..1e28221cc15 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -743,7 +743,6 @@ extern int __wt_map_error_rdonly(int error); extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); extern int __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); extern int __wt_mmap_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); -extern int __wt_mmap_preload_madvise( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); extern int __wt_once(void (*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp); diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index bb0757d1595..76cc8a8e986 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -61,11 +61,11 @@ __wt_mmap(WT_SESSION_IMPL *session, #ifdef HAVE_POSIX_MADVISE /* - * __wt_mmap_preload_madvise -- + * __mmap_preload_madvise -- * Cause a section of a memory map to be faulted in. */ -int -__wt_mmap_preload_madvise( +static int +__mmap_preload_madvise( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) { WT_BM *bm; @@ -97,10 +97,10 @@ __wt_mmap_preload_madvise( size &= ~(size_t)(conn->page_size - 1); if (size > (size_t)conn->page_size && - (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) != 0) - WT_RET_MSG(session, ret, - "%s: posix_madvise: POSIX_MADV_WILLNEED", fh->name); - return (0); + (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0) + return (0); + WT_RET_MSG(session, ret, + "%s: posix_madvise: POSIX_MADV_WILLNEED", fh->name); } #endif @@ -115,7 +115,7 @@ __wt_mmap_preload( WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); #ifdef HAVE_POSIX_MADVISE - return (__wt_mmap_preload_madvise(session, fh, p, size)); + return (__mmap_preload_madvise(session, fh, p, size)); #else WT_UNUSED(fh); WT_UNUSED(p); @@ -124,16 +124,15 @@ __wt_mmap_preload( #endif } +#ifdef HAVE_POSIX_MADVISE /* - * __wt_mmap_discard -- + * __mmap_discard_madvise -- * Discard a chunk of the memory map. */ -int -__wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) +static int +__mmap_discard_madvise( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) { - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - -#ifdef HAVE_POSIX_MADVISE WT_CONNECTION_IMPL *conn; WT_DECL_RET; void *blk; @@ -144,15 +143,30 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); - if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) != 0) - WT_RET_MSG(session, ret, - "%s: posix_madvise: POSIX_MADV_DONTNEED", fh->name); + if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) == 0) + return (0); + WT_RET_MSG(session, ret, + "%s: posix_madvise: POSIX_MADV_DONTNEED", fh->name); +} +#endif + +/* + * __wt_mmap_discard -- + * Discard a chunk of the memory map. + */ +int +__wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + +#ifdef HAVE_POSIX_MADVISE + return (__mmap_discard_madvise(session, fh, p, size)); #else WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); -#endif return (ENOTSUP); +#endif } /* -- cgit v1.2.1 From 02029a8fb2823104840ae8ab3b970d104983c535 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 27 Mar 2016 09:48:45 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files I flipped the sense of a test in the posix_madvise code, and got it wrong, fix it. --- src/os_posix/os_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 76cc8a8e986..4f5b4acdd5f 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -96,7 +96,7 @@ __mmap_preload_madvise( */ size &= ~(size_t)(conn->page_size - 1); - if (size > (size_t)conn->page_size && + if (size <= (size_t)conn->page_size || (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0) return (0); WT_RET_MSG(session, ret, -- cgit v1.2.1 From 5ea3ffb90fbd27f65e632e8f496f1da569b06434 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 27 Mar 2016 10:14:52 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Move the Windows mapping handle from the block-manager handle into the WT_FH handle, private to Windows. --- src/block/block_map.c | 11 ++++------- src/block/block_mgr.c | 8 ++++---- src/include/block.h | 1 - src/include/extern.h | 8 ++++---- src/include/os.h | 1 + src/os_posix/os_map.c | 10 ++-------- src/os_win/os_map.c | 24 ++++++++++-------------- 7 files changed, 25 insertions(+), 38 deletions(-) diff --git a/src/block/block_map.c b/src/block/block_map.c index c21ca9cfa19..37895f9892b 100644 --- a/src/block/block_map.c +++ b/src/block/block_map.c @@ -14,8 +14,7 @@ */ int __wt_block_map( - WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, - void **mappingcookie) + WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp) { *(void **)mapp = NULL; *maplenp = 0; @@ -27,7 +26,6 @@ __wt_block_map( */ WT_UNUSED(session); WT_UNUSED(block); - WT_UNUSED(mappingcookie); #else /* Map support is configurable. */ if (!S2C(session)->mmap) @@ -52,7 +50,7 @@ __wt_block_map( * Map the file into memory. * Ignore errors, we'll read the file through the cache if map fails. */ - (void)__wt_mmap(session, block->fh, mapp, maplenp, mappingcookie); + (void)__wt_mmap(session, block->fh, mapp, maplenp); #endif return (0); @@ -64,9 +62,8 @@ __wt_block_map( */ int __wt_block_unmap( - WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen, - void **mappingcookie) + WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen) { /* Unmap the file from memory. */ - return (__wt_munmap(session, block->fh, map, maplen, mappingcookie)); + return (__wt_munmap(session, block->fh, map, maplen)); } diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c index e16b8709d7d..f842fd4d98e 100644 --- a/src/block/block_mgr.c +++ b/src/block/block_mgr.c @@ -102,8 +102,8 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session, * Read-only objects are optionally mapped into memory instead * of being read into cache buffers. */ - WT_RET(__wt_block_map(session, - bm->block, &bm->map, &bm->maplen, &bm->mappingcookie)); + WT_RET(__wt_block_map( + session, bm->block, &bm->map, &bm->maplen)); /* * If this handle is for a checkpoint, that is, read-only, there @@ -148,8 +148,8 @@ __bm_checkpoint_unload(WT_BM *bm, WT_SESSION_IMPL *session) /* Unmap any mapped segment. */ if (bm->map != NULL) - WT_TRET(__wt_block_unmap(session, - bm->block, bm->map, bm->maplen, &bm->mappingcookie)); + WT_TRET(__wt_block_unmap( + session, bm->block, bm->map, bm->maplen)); /* Unload the checkpoint. */ WT_TRET(__wt_block_checkpoint_unload(session, bm->block, !bm->is_live)); diff --git a/src/include/block.h b/src/include/block.h index 76891e1e5f6..ebdaa91d33e 100644 --- a/src/include/block.h +++ b/src/include/block.h @@ -198,7 +198,6 @@ struct __wt_bm { void *map; /* Mapped region */ size_t maplen; - void *mappingcookie; /* * There's only a single block manager handle that can be written, all diff --git a/src/include/extern.h b/src/include/extern.h index 1e28221cc15..f1fbdb4d30a 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -41,8 +41,8 @@ extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, W extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el); extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size); extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); -extern int __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, void **mappingcookie); -extern int __wt_block_unmap( WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen, void **mappingcookie); +extern int __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp); +extern int __wt_block_unmap( WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen); extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp); extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename); extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize); @@ -740,10 +740,10 @@ extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); extern int __wt_map_error_rdonly(int error); -extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); +extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp); extern int __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); extern int __wt_mmap_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); -extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); +extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len); extern int __wt_once(void (*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp); extern int __wt_os_cleanup(WT_SESSION_IMPL *session); diff --git a/src/include/os.h b/src/include/os.h index 2ff78b2777f..9a445b3bcd9 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -81,6 +81,7 @@ struct __wt_fh { HANDLE filehandle; /* Windows file handle */ HANDLE filehandle_secondary; /* Windows file handle for file size changes */ + HANDLE maphandle; /* Windows map object */ #else int fd; /* POSIX file handle */ #endif diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 4f5b4acdd5f..278b6380080 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -13,15 +13,12 @@ * Map a file into memory. */ int -__wt_mmap(WT_SESSION_IMPL *session, - WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) +__wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) { size_t len; void *map; wt_off_t file_size; - WT_UNUSED(mappingcookie); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); /* @@ -174,11 +171,8 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) * Remove a memory mapping. */ int -__wt_munmap(WT_SESSION_IMPL *session, - WT_FH *fh, void *map, size_t len, void **mappingcookie) +__wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) { - WT_UNUSED(mappingcookie); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index b56929ce0e5..7955abafca1 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -13,8 +13,7 @@ * Map a file into memory. */ int -__wt_mmap(WT_SESSION_IMPL *session, - WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) +__wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) { size_t len; void *map; @@ -28,18 +27,18 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_RET(__wt_filesize(session, fh, &file_size)); len = (size_t)file_size; - *mappingcookie = + fh->maphandle = CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); - if (*mappingcookie == NULL) + if (fh->maphandle == NULL) WT_RET_MSG(session, __wt_win32_errno(), "%s CreateFileMapping error: failed to map %" WT_SIZET_FMT " bytes", fh->name, len); if ((map = MapViewOfFile( - *mappingcookie, FILE_MAP_READ, 0, 0, len)) == NULL) { - CloseHandle(*mappingcookie); - *mappingcookie = NULL; + fh->maphandle, FILE_MAP_READ, 0, 0, len)) == NULL) { + (void)CloseHandle(fh->maphandle); + fh->maphandle = INVALID_HANDLE_VALUE; WT_RET_MSG(session, __wt_win32_errno(), "%s map error: failed to map %" WT_SIZET_FMT " bytes", @@ -90,26 +89,23 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) * Remove a memory mapping. */ int -__wt_munmap(WT_SESSION_IMPL *session, - WT_FH *fh, void *map, size_t len, void **mappingcookie) +__wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) { WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: UnmapViewOfFile %p: %" WT_SIZET_FMT " bytes", fh->name, map, len)); - if (UnmapViewOfFile(map) == 0) { + if (UnmapViewOfFile(map) == 0) WT_RET_MSG(session, __wt_win32_errno(), "%s UnmapViewOfFile error: failed to unmap %" WT_SIZET_FMT " bytes", fh->name, len); - } - if (CloseHandle(*mappingcookie) == 0) { + if (CloseHandle(fh->maphandle) == 0) WT_RET_MSG(session, __wt_win32_errno(), "CloseHandle: MapViewOfFile: %s", fh->name); - } - *mappingcookie = 0; + fh->maphandle = INVALID_HANDLE_VALUE; return (0); } -- cgit v1.2.1 From 69b26f1f8338eee18fe0abdba43264e4cbfd443f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 27 Mar 2016 10:50:14 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Remove HAVE_SYNC_FILE_RANGE #ifdef from the block manager and support ENOTSUP returns up the stack from any underlying asynchronous flush call. General reworking of block manager's sync code to ignore further calls whenever ENOTSUP is returned on a handle. --- src/block/block_mgr.c | 14 +++++++++++++- src/block/block_open.c | 3 ++- src/block/block_write.c | 11 ++++++++--- src/include/block.h | 3 ++- src/os_posix/os_inmemory.c | 8 ++++++-- src/os_posix/os_posix.c | 6 +++++- src/os_win/os_win.c | 9 +++++++-- 7 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c index f842fd4d98e..f20bb991ff7 100644 --- a/src/block/block_mgr.c +++ b/src/block/block_mgr.c @@ -413,7 +413,19 @@ __bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats) static int __bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool block) { - return (__wt_fsync(session, bm->block->fh, block)); + WT_DECL_RET; + + if (!block && !bm->block->nowait_sync_available) + return (0); + + if ((ret = __wt_fsync(session, bm->block->fh, block)) == 0) + return (0); + + /* Ignore ENOTSUP, but don't try again. */ + if (ret != ENOTSUP) + return (ret); + bm->block->nowait_sync_available = false; + return (0); } /* diff --git a/src/block/block_open.c b/src/block/block_open.c index 0fef6ad0e66..777fb1e8cd9 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -210,7 +210,8 @@ __wt_block_open(WT_SESSION_IMPL *session, /* Set the file extension information. */ block->extend_len = conn->data_extend_len; - /* Set the preload availability. */ + /* Set the asynchronous flush, preload availability. */ + block->nowait_sync_available = true; block->preload_available = true; /* Initialize the live checkpoint's lock. */ diff --git a/src/block/block_write.c b/src/block/block_write.c index d6599d81a8e..771b0d34193 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -337,7 +337,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_RET(ret); } -#ifdef HAVE_SYNC_FILE_RANGE /* * Optionally schedule writes for dirty pages in the system buffer * cache, but only if the current session can wait. @@ -346,9 +345,15 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, (block->os_cache_dirty += align_size) > block->os_cache_dirty_max && __wt_session_can_wait(session)) { block->os_cache_dirty = 0; - WT_RET(__wt_fsync(session, fh, false)); + if ((ret = __wt_fsync(session, fh, false)) != 0) { + /* + * Ignore ENOTSUP, but don't try again. + */ + if (ret != ENOTSUP) + return (ret); + block->os_cache_dirty_max = 0; + } } -#endif /* Optionally discard blocks from the buffer cache. */ WT_RET(__wt_block_discard(session, block, align_size)); diff --git a/src/include/block.h b/src/include/block.h index ebdaa91d33e..d1ea04572e7 100644 --- a/src/include/block.h +++ b/src/include/block.h @@ -223,7 +223,8 @@ struct __wt_block { wt_off_t size; /* File size */ wt_off_t extend_size; /* File extended size */ wt_off_t extend_len; /* File extend chunk size */ - bool preload_available; /* File pages can be preloaded. */ + bool nowait_sync_available; /* File can flush asynchronously */ + bool preload_available; /* File pages can be preloaded */ /* Configuration information, set when the file is opened. */ uint32_t allocfirst; /* Allocation is first-fit */ diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c index 4d5b35e1499..e79054e56ed 100644 --- a/src/os_posix/os_inmemory.c +++ b/src/os_posix/os_inmemory.c @@ -320,8 +320,12 @@ __im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { WT_UNUSED(session); WT_UNUSED(fh); - WT_UNUSED(block); - return (0); + + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, and + * won't make further attempts. + */ + return (block ? 0 : ENOTSUP); } /* diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index a89cf13a760..ccae0554a92 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -27,8 +27,12 @@ __posix_sync(WT_SESSION_IMPL *session, WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func); } #else + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, and + * won't make further attempts. + */ if (!block) - return (0); + return (ENOTSUP); #endif #if defined(F_FULLFSYNC) diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 3d7a71deb03..647f795aa36 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -376,9 +376,14 @@ __win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { WT_DECL_RET; - WT_UNUSED(block); - if (fh->fp == NULL) { + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, + * and won't make further attempts. + */ + if (!block) + return (ENOTSUP); + if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) WT_RET_MSG(session, __wt_win32_errno(), "%s handle-sync: FlushFileBuffers error", fh->name); -- cgit v1.2.1 From 45aa4385e43957970bfb8f768c63bf05afb5d5f8 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 27 Mar 2016 12:47:02 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files The wt utility isn't syncing the enclosing directory on a backup file which leaves us potentially vulnerable on a crash on Linux. WiredTiger knows how to do the magic, so use a WiredTiger function to do the backup file copy. This also gets rid of some Windows-specific code in the wt utility. --- src/include/extern.h | 1 + src/support/filename.c | 66 ++++++++++++++++++++++++++++ src/utilities/util_backup.c | 102 +++++++++++--------------------------------- 3 files changed, 91 insertions(+), 78 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index f1fbdb4d30a..27e2ccb22de 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -609,6 +609,7 @@ extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t na extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); +extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to); extern int __wt_library_init(void); extern int __wt_breakpoint(void); extern void __wt_attach(WT_SESSION_IMPL *session); diff --git a/src/support/filename.c b/src/support/filename.c index ac0aee5686e..b4858d2e982 100644 --- a/src/support/filename.c +++ b/src/support/filename.c @@ -123,3 +123,69 @@ __wt_sync_handle_and_rename( return (__wt_rename_and_sync_directory(session, from, to)); } + +/* + * __wt_copy_and_sync -- + * Copy a file safely; here to support the wt utility. + */ +int +__wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) +{ + WT_DECL_ITEM(tmp); + WT_DECL_RET; + WT_FH *ffh, *tfh; + WT_SESSION_IMPL *session; + size_t n; + wt_off_t offset, size; + char *buf; + + session = (WT_SESSION_IMPL *)wt_session; + ffh = tfh = NULL; + buf = NULL; + + /* + * Remove the target file if it exists, then create a temporary file, + * copy the original into it and rename it into place. I don't think + * its necessary to remove the file, or create a copy and do a rename, + * it's likely safe to overwrite the backup file directly. I'm doing + * the remove and rename to insulate us from errors in other programs + * that might not detect a corrupted backup file; it's cheap insurance + * in a path where undetected failure is very bad. + */ + WT_RET(__wt_remove_if_exists(session, to)); + + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to)); + + /* Open the from and temporary file handles. */ + WT_ERR(__wt_open(session, from, + WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &ffh)); + WT_ERR(__wt_open(session, tmp->data, + WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh)); + + /* + * Allocate a copy buffer. Don't use a scratch buffer, this thing is + * big, and we don't want it hanging around. + */ +#define WT_BACKUP_COPY_SIZE (128 * 1024) + WT_ERR(__wt_malloc(session, WT_BACKUP_COPY_SIZE, &buf)); + + /* Get the file's size, then copy the bytes. */ + WT_ERR(__wt_filesize(session, ffh, &size)); + for (offset = 0; size > 0; size -= n, offset += n) { + n = (size_t)WT_MIN(size, WT_BACKUP_COPY_SIZE); + WT_ERR(__wt_read(session, ffh, offset, n, buf)); + WT_ERR(__wt_write(session, tfh, offset, n, buf)); + } + + /* Close the from handle, then swap the temporary file into place. */ + WT_ERR(__wt_close(session, &ffh)); + ret = __wt_sync_handle_and_rename(session, &tfh, tmp->data, to); + +err: WT_TRET(__wt_close(session, &ffh)); + WT_TRET(__wt_close(session, &tfh)); + + __wt_free(session, buf); + __wt_scr_free(session, &tmp); + return (ret); +} diff --git a/src/utilities/util_backup.c b/src/utilities/util_backup.c index b3afc78e9e8..55c0e336111 100644 --- a/src/utilities/util_backup.c +++ b/src/utilities/util_backup.c @@ -8,12 +8,9 @@ #include "util.h" -static int copy(const char *, const char *); +static int copy(WT_SESSION *, const char *, const char *); static int usage(void); -#define CBUF_LEN (128 * 1024) /* Copy buffer and size. */ -static char *cbuf; - /* * append_target -- * Build a list of comma-separated targets. @@ -86,7 +83,7 @@ util_backup(WT_SESSION *session, int argc, char *argv[]) while ( (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_key(cursor, &name)) == 0) - if ((ret = copy(name, directory)) != 0) + if ((ret = copy(session, name, directory)) != 0) goto err; if (ret == WT_NOTFOUND) ret = 0; @@ -98,97 +95,46 @@ util_backup(WT_SESSION *session, int argc, char *argv[]) } err: free(config); - free(cbuf); - return (ret); } static int -copy(const char *name, const char *directory) +copy(WT_SESSION *session, const char *name, const char *directory) { WT_DECL_RET; - ssize_t n; - int ifd, ofd; + size_t len; + char *from, *to; - ret = 1; - ifd = ofd = -1; + from = to = NULL; - if (verbose && - printf("Backing up %s/%s to %s\n", home, name, directory) < 0) { - fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - return (1); - } - - /* Allocate a large copy buffer (use it to build pathnames as well. */ - if (cbuf == NULL && (cbuf = malloc(CBUF_LEN)) == NULL) + /* Build the 2 pathnames we need. */ + len = strlen(home) + strlen(name) + 2; + if ((from = malloc(len)) == NULL) goto memerr; - - /* Open the read file. */ - if (snprintf(cbuf, CBUF_LEN, "%s/%s", home, name) >= CBUF_LEN) + (void)snprintf(from, len, "%s/%s", home, name); + len = strlen(directory) + strlen(name) + 2; + if ((to = malloc(len)) == NULL) goto memerr; - if ((ifd = open(cbuf, O_BINARY | O_RDONLY, 0)) < 0) - goto readerr; + (void)snprintf(to, len, "%s/%s", directory, name); - /* Open the write file. */ - if (snprintf(cbuf, CBUF_LEN, "%s/%s", directory, name) >= CBUF_LEN) - goto memerr; - if ((ofd = open( - cbuf, O_BINARY | O_CREAT | O_WRONLY | O_TRUNC, 0666)) < 0) - goto writerr; - - /* Copy the file. */ - while ((n = read(ifd, cbuf, CBUF_LEN)) > 0) - if (write(ofd, cbuf, (size_t)n) != n) - goto writerr; - if (n != 0) - goto readerr; - - /* - * Close file descriptors (forcing a flush on the write side), and - * check for any errors. - */ - ret = close(ifd); - ifd = -1; - if (ret != 0) - goto readerr; + if (verbose && printf("Backing up %s to %s\n", from, to) < 0) { + fprintf(stderr, "%s: %s\n", progname, strerror(EIO)); + goto err; + } /* - * We need to know this file was successfully written, it's a backup. + * Use WiredTiger to copy the file: ensuring stability of the copied + * file on disk requires care, and WiredTiger knows how to do it. */ -#ifdef _WIN32 - if (FlushFileBuffers((HANDLE)_get_osfhandle(ofd)) == 0) { - DWORD err = GetLastError(); - ret = err; - goto writerr; - } -#else - if (fsync(ofd)) - goto writerr; -#endif - ret = close(ofd); - ofd = -1; - if (ret != 0) - goto writerr; - - /* Success. */ - ret = 0; + if ((ret = __wt_copy_and_sync(session, from, to)) != 0) + fprintf(stderr, "%s to %s: backup copy: %s\n", + from, to, session->strerror(session, ret)); - if (0) { -readerr: fprintf(stderr, - "%s: %s/%s: %s\n", progname, home, name, strerror(errno)); - } - if (0) { -writerr: fprintf(stderr, "%s: %s/%s: %s\n", - progname, directory, name, strerror(errno)); - } if (0) { memerr: fprintf(stderr, "%s: %s\n", progname, strerror(errno)); } - - if (ifd >= 0) - (void)close(ifd); - if (ofd >= 0) - (void)close(ofd); +err: free(from); + free(to); return (ret); } -- cgit v1.2.1 From d392894ae515628965924dfd1fdcfe276169c2fd Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 27 Mar 2016 12:58:38 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files We should no longer need to #ifdef __wt_posix_fadvise, all of its callers now handle ENOTSUP returns, so we won't repeatedly call it to no purpose. --- src/include/misc.i | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/include/misc.i b/src/include/misc.i index 373a74272d0..5d9e5943254 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -65,8 +65,8 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...) return (ret); #else WT_UNUSED(session); - WT_UNUSED(fmt); WT_UNUSED(flag); + WT_UNUSED(fmt); return (0); #endif } @@ -187,19 +187,10 @@ static inline int __wt_posix_fadvise(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) { -#if defined(HAVE_POSIX_FADVISE) WT_RET(__wt_verbose( session, WT_VERB_HANDLEOPS, "%s: handle-advise", fh->name)); return (fh->fh_advise(session, fh, offset, len, advice)); -#else - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - return (0); -#endif } /* -- cgit v1.2.1 From a734a74edbac74be07721dce442f29f2464b3290 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 27 Mar 2016 15:11:09 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Make all of the memory-map error/verbose messages standard. Don't compare the return of CreateFileMappingA against NULL. Even if UnmapViewOfFile() fails, still close the file handle. Save the error value before calling another MSVC funtion that might clear it. --- dist/s_string.ok | 1 + src/os_posix/os_map.c | 29 ++++++++++++++-------------- src/os_win/os_map.c | 53 +++++++++++++++++++++++++++------------------------ 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index ba826339b9f..018654d46bc 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -69,6 +69,7 @@ Config Coverity CreateFileA CreateFileMapping +CreateFileMappingA Crummey CustomersPhone DECL diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 278b6380080..ee72cad0baa 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -16,8 +16,8 @@ int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) { size_t len; - void *map; wt_off_t file_size; + void *map; WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); @@ -37,19 +37,18 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) WT_RET(__wt_filesize(session, fh, &file_size)); len = (size_t)file_size; + (void)__wt_verbose(session, WT_VERB_FILEOPS, + "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len); + if ((map = mmap(NULL, len, PROT_READ, #ifdef MAP_NOCORE MAP_NOCORE | #endif MAP_PRIVATE, - fh->fd, (wt_off_t)0)) == MAP_FAILED) { - WT_RET_MSG(session, __wt_errno(), - "%s map error: failed to map %" WT_SIZET_FMT " bytes", - fh->name, len); - } - (void)__wt_verbose(session, WT_VERB_FILEOPS, - "%s: map %p: %" WT_SIZET_FMT " bytes", fh->name, map, len); + fh->fd, (wt_off_t)0)) == MAP_FAILED) + WT_RET_MSG(session, + __wt_errno(), "%s: memory-map: mmap", fh->name); *(void **)mapp = map; *lenp = len; @@ -97,7 +96,8 @@ __mmap_preload_madvise( (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0) return (0); WT_RET_MSG(session, ret, - "%s: posix_madvise: POSIX_MADV_WILLNEED", fh->name); + "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED", + fh->name); } #endif @@ -143,7 +143,8 @@ __mmap_discard_madvise( if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) == 0) return (0); WT_RET_MSG(session, ret, - "%s: posix_madvise: POSIX_MADV_DONTNEED", fh->name); + "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED", + fh->name); } #endif @@ -175,13 +176,11 @@ __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: unmap %p: %" WT_SIZET_FMT " bytes", fh->name, map, len)); + (void)__wt_verbose(session, WT_VERB_FILEOPS, + "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); if (munmap(map, len) == 0) return (0); - WT_RET_MSG(session, __wt_errno(), - "%s unmap error: failed to unmap %" WT_SIZET_FMT " bytes", - fh->name, len); + WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name); } diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index 7955abafca1..38180efff37 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -15,9 +15,10 @@ int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) { + WT_DECL_RET; size_t len; - void *map; wt_off_t file_size; + void *map; /* * There's no locking here to prevent the underlying file from changing @@ -27,26 +28,25 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) WT_RET(__wt_filesize(session, fh, &file_size)); len = (size_t)file_size; + (void)__wt_verbose(session, WT_VERB_FILEOPS, + "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len); + fh->maphandle = CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); - if (fh->maphandle == NULL) + if (fh->maphandle == INVALID_HANDLE_VALUE) WT_RET_MSG(session, __wt_win32_errno(), - "%s CreateFileMapping error: failed to map %" - WT_SIZET_FMT " bytes", - fh->name, len); + "%s: memory-map: CreateFileMappingA", fh->name); + + if ((map = + MapViewOfFile(fh->maphandle, FILE_MAP_READ, 0, 0, len)) == NULL) { + ret = __wt_win32_errno(); - if ((map = MapViewOfFile( - fh->maphandle, FILE_MAP_READ, 0, 0, len)) == NULL) { (void)CloseHandle(fh->maphandle); fh->maphandle = INVALID_HANDLE_VALUE; - WT_RET_MSG(session, __wt_win32_errno(), - "%s map error: failed to map %" WT_SIZET_FMT " bytes", - fh->name, len); + WT_RET_MSG(session, ret, + "%s: memory-map: MapViewOfFile", fh->name); } - (void)__wt_verbose(session, WT_VERB_FILEOPS, - "%s: MapViewOfFile %p: %" WT_SIZET_FMT " bytes", - fh->name, map, len); *(void **)mapp = map; *lenp = len; @@ -91,21 +91,24 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) { - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: UnmapViewOfFile %p: %" WT_SIZET_FMT " bytes", - fh->name, map, len)); + WT_DECL_RET; - if (UnmapViewOfFile(map) == 0) - WT_RET_MSG(session, __wt_win32_errno(), - "%s UnmapViewOfFile error: failed to unmap %" WT_SIZET_FMT - " bytes", - fh->name, len); + (void)__wt_verbose(session, WT_VERB_FILEOPS, + "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); - if (CloseHandle(fh->maphandle) == 0) - WT_RET_MSG(session, __wt_win32_errno(), - "CloseHandle: MapViewOfFile: %s", fh->name); + if (UnmapViewOfFile(map) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: memory-unmap: UnmapViewOfFile", fh->name); + } + + if (CloseHandle(fh->maphandle) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: memory-unmap: CloseHandle", fh->name); + } fh->maphandle = INVALID_HANDLE_VALUE; - return (0); + return (ret); } -- cgit v1.2.1 From c021fb9c313305ac5914ebec81d5daeb2a796e1b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 27 Mar 2016 15:27:02 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Don't open a secondary file handle for read-only files: it's not needed (the secondary handle is only used for file truncation), and second, the attempt will fail. --- src/os_win/os_win.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 647f795aa36..3f2806c4d9d 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -407,6 +407,10 @@ __win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) largeint.QuadPart = len; + if (fh->filehandle_secondary == INVALID_HANDLE_VALUE) + WT_RET_MSG(session, EINVAL, + "%s: handle-truncate: read-only", fh->name); + if (SetFilePointerEx( fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) WT_RET_MSG(session, __wt_win32_errno(), @@ -570,11 +574,14 @@ __win_handle_open(WT_SESSION_IMPL *session, * concurrently with reads on the file. Writes would also move the file * pointer. */ - filehandle_secondary = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, - share_mode, NULL, OPEN_EXISTING, f, NULL); - if (filehandle_secondary == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_win32_errno(), - "%s: handle-open: CreateFileA: secondary", name); + if (!LF_ISSET(WT_OPEN_READONLY)) { + filehandle_secondary = CreateFileA(name, + GENERIC_READ | GENERIC_WRITE, + share_mode, NULL, OPEN_EXISTING, f, NULL); + if (filehandle_secondary == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_win32_errno(), + "%s: handle-open: CreateFileA: secondary", name); + } /* Optionally configure a stdio stream API. */ switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { -- cgit v1.2.1 From b5338a1fab247e781ca5491ac2abc56317108c84 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 28 Mar 2016 07:15:09 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Group all WT_BLOCK configuration together. --- src/block/block_open.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/block/block_open.c b/src/block/block_open.c index 777fb1e8cd9..fe248284758 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -201,12 +201,6 @@ __wt_block_open(WT_SESSION_IMPL *session, WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval)); block->os_cache_dirty_max = (size_t)cval.val; - /* Open the underlying file handle. */ - WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh)); - - /* Set the file's size. */ - WT_ERR(__wt_filesize(session, block->fh, &block->size)); - /* Set the file extension information. */ block->extend_len = conn->data_extend_len; @@ -214,6 +208,12 @@ __wt_block_open(WT_SESSION_IMPL *session, block->nowait_sync_available = true; block->preload_available = true; + /* Open the underlying file handle. */ + WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh)); + + /* Set the file's size. */ + WT_ERR(__wt_filesize(session, block->fh, &block->size)); + /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); -- cgit v1.2.1 From 67142d643a2dd632ee1326c3c1924f984f548998 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 28 Mar 2016 07:47:26 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files When running in-memory, don't read/write the file descriptor's structure, at 4KB each, a large number of collections can take up significant amounts of memory. --- src/block/block_open.c | 14 +++++++++++--- src/block/block_slvg.c | 2 +- src/include/extern.h | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/block/block_open.c b/src/block/block_open.c index fe248284758..f4da5ca7c05 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -67,7 +67,7 @@ __wt_block_manager_create( } /* Write out the file's meta-data. */ - ret = __wt_desc_init(session, fh, allocsize); + ret = __wt_desc_write(session, fh, allocsize); /* * Ensure the truncated file has made it to disk, then the upper-level @@ -266,16 +266,20 @@ __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block) } /* - * __wt_desc_init -- + * __wt_desc_write -- * Write a file's initial descriptor structure. */ int -__wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) +__wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) { WT_BLOCK_DESC *desc; WT_DECL_ITEM(buf); WT_DECL_RET; + /* If in-memory, we don't read or write the descriptor structure. */ + if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) + return (0); + /* Use a scratch buffer to get correct alignment for direct I/O. */ WT_RET(__wt_scr_alloc(session, allocsize, &buf)); memset(buf->mem, 0, allocsize); @@ -313,6 +317,10 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_DECL_RET; uint32_t cksum_calculate, cksum_tmp; + /* If in-memory, we don't read or write the descriptor structure. */ + if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) + return (0); + /* Use a scratch buffer to get correct alignment for direct I/O. */ WT_RET(__wt_scr_alloc(session, block->allocsize, &buf)); diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c index a8fd23f25c0..6be3fa73f70 100644 --- a/src/block/block_slvg.c +++ b/src/block/block_slvg.c @@ -21,7 +21,7 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) allocsize = block->allocsize; /* Reset the description information in the first block. */ - WT_RET(__wt_desc_init(session, block->fh, allocsize)); + WT_RET(__wt_desc_write(session, block->fh, allocsize)); /* * Salvage creates a new checkpoint when it's finished, set up for diff --git a/src/include/extern.h b/src/include/extern.h index f1fbdb4d30a..b5160263405 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -49,7 +49,7 @@ extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *file extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp); extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block); -extern int __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize); +extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize); extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats); extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep); extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep); -- cgit v1.2.1 From 0ec7be4e7843a5acc036e4178b25659037471bfa Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 28 Mar 2016 09:40:53 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Move the full open path creation out of the platform-specific code into the top-level generic open code, it's simpler that way and the platforms don't need the details. Minor simplification of the read-only code and asserts. --- src/os_posix/os_open.c | 31 ++++++++++++++++++------------- src/os_posix/os_posix.c | 11 ----------- src/os_win/os_win.c | 10 ---------- 3 files changed, 18 insertions(+), 34 deletions(-) diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index de27ab83667..a18ea658fde 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -163,14 +163,15 @@ __wt_open(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh; - bool open_called; + bool lock_file, open_called; + const char *path; WT_ASSERT(session, file_type != 0); /* A file type is required. */ conn = S2C(session); - fh = NULL; open_called = false; + path = name; WT_RET(__open_verbose(session, name, file_type, flags)); @@ -196,21 +197,22 @@ __wt_open(WT_SESSION_IMPL *session, /* * If this is a read-only connection, open all files read-only except * the lock file. - */ - if (F_ISSET(conn, WT_CONN_READONLY) && - !WT_STRING_MATCH(name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))) - LF_SET(WT_OPEN_READONLY); - - /* + * * The only file created in read-only mode is the lock file. */ - WT_ASSERT(session, - !LF_ISSET(WT_OPEN_CREATE) || - !F_ISSET(conn, WT_CONN_READONLY) || - WT_STRING_MATCH(name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); + if (F_ISSET(conn, WT_CONN_READONLY)) { + lock_file = strcmp(name, WT_SINGLETHREAD) == 0; + if (!lock_file) + LF_SET(WT_OPEN_READONLY); + WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE)); + } + + /* Create the path to the file. */ + if (!LF_ISSET(WT_OPEN_FIXED)) + WT_ERR(__wt_filename(session, name, &path)); /* Call the underlying open function. */ - WT_ERR(conn->handle_open(session, fh, name, file_type, flags)); + WT_ERR(conn->handle_open(session, fh, path, file_type, flags)); open_called = true; /* @@ -225,6 +227,9 @@ err: if (open_called) __wt_free(session, fh); } } + + if (path != name) + __wt_free(session, path); return (ret); } diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index ccae0554a92..b3ebb851506 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -533,23 +533,15 @@ __posix_handle_open(WT_SESSION_IMPL *session, mode_t mode; int f, fd, tret; bool direct_io; - char *path; const char *stream_mode; conn = S2C(session); direct_io = false; - path = NULL; /* Set up error handling. */ fh->fd = fd = -1; fh->fp = NULL; - /* Create the path to the file. */ - if (!LF_ISSET(WT_OPEN_FIXED)) { - WT_ERR(__wt_filename(session, name, &path)); - name = path; - } - if (file_type == WT_FILE_TYPE_DIRECTORY) { f = O_RDONLY; #ifdef O_CLOEXEC @@ -660,7 +652,6 @@ __posix_handle_open(WT_SESSION_IMPL *session, "%s: handle-open: fdopen", name); directory_open: - __wt_free(session, path); fh->fd = fd; /* Configure fallocate calls. */ @@ -685,8 +676,6 @@ err: if (fd != -1) { if (tret != 0) __wt_err(session, tret, "%s: handle-open: close", name); } - - __wt_free(session, path); return (ret); } diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 3f2806c4d9d..212fcd0238b 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -478,12 +478,10 @@ __win_handle_open(WT_SESSION_IMPL *session, WT_DECL_RET; int f, fd, share_mode; bool direct_io; - char *path; const char *stream_mode; conn = S2C(session); direct_io = false; - path = NULL; /* Set up error handling. */ fh->filehandle = fh->filehandle_secondary = @@ -499,12 +497,6 @@ __win_handle_open(WT_SESSION_IMPL *session, if (file_type == WT_FILE_TYPE_DIRECTORY) goto directory_open; - /* Create the path to the file. */ - if (!LF_ISSET(WT_OPEN_FIXED)) { - WT_ERR(__wt_filename(session, name, &path)); - name = path; - } - share_mode = FILE_SHARE_READ; if (!LF_ISSET(WT_OPEN_READONLY)) share_mode |= FILE_SHARE_WRITE; @@ -617,7 +609,6 @@ __win_handle_open(WT_SESSION_IMPL *session, __win_handle_allocate_configure(session, fh); directory_open: - __wt_free(session, path); fh->filehandle = filehandle; fh->filehandle_secondary = filehandle_secondary; @@ -640,7 +631,6 @@ err: if (filehandle != INVALID_HANDLE_VALUE) if (filehandle_secondary != INVALID_HANDLE_VALUE) (void)CloseHandle(filehandle_secondary); - __wt_free(session, path); return (ret); } -- cgit v1.2.1 From a319e1ec929866012e469d84b5d7066446955bfe Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 28 Mar 2016 10:06:23 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files error: passing argument 3 of '__wt_filename' from incompatible pointer type [-Werror=incompatible-pointer-types] --- src/os_posix/os_open.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index a18ea658fde..9013d67d22b 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -164,14 +164,14 @@ __wt_open(WT_SESSION_IMPL *session, WT_DECL_RET; WT_FH *fh; bool lock_file, open_called; - const char *path; + char *path; WT_ASSERT(session, file_type != 0); /* A file type is required. */ conn = S2C(session); fh = NULL; open_called = false; - path = name; + path = NULL; WT_RET(__open_verbose(session, name, file_type, flags)); @@ -212,7 +212,8 @@ __wt_open(WT_SESSION_IMPL *session, WT_ERR(__wt_filename(session, name, &path)); /* Call the underlying open function. */ - WT_ERR(conn->handle_open(session, fh, path, file_type, flags)); + WT_ERR(conn->handle_open( + session, fh, path == NULL ? name : path, file_type, flags)); open_called = true; /* @@ -228,8 +229,7 @@ err: if (open_called) } } - if (path != name) - __wt_free(session, path); + __wt_free(session, path); return (ret); } -- cgit v1.2.1 From 8caf2c83a8a79b6ac6216d2ebebf7a553d57266c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 28 Mar 2016 10:23:04 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Remove the temporary file as well, if it exists. We're opening it exclusively and if there's a leftover file, backup might fail. --- src/support/filename.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/support/filename.c b/src/support/filename.c index b4858d2e982..e8edceaa1e2 100644 --- a/src/support/filename.c +++ b/src/support/filename.c @@ -152,11 +152,12 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) * that might not detect a corrupted backup file; it's cheap insurance * in a path where undetected failure is very bad. */ - WT_RET(__wt_remove_if_exists(session, to)); - WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to)); + WT_ERR(__wt_remove_if_exists(session, to)); + WT_ERR(__wt_remove_if_exists(session, tmp->data)); + /* Open the from and temporary file handles. */ WT_ERR(__wt_open(session, from, WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &ffh)); -- cgit v1.2.1 From 2772745499e8ec05fa9c002af954fe679ffa38b1 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 28 Mar 2016 11:12:57 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Reviewer request, add a comment. --- src/os_posix/os_posix.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index b3ebb851506..f7b81099fdb 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -582,6 +582,12 @@ __posix_handle_open(WT_SESSION_IMPL *session, f |= O_CLOEXEC; #endif #ifdef O_DIRECT + /* + * Direct I/O: file-type is a flag from the set of possible flags stored + * in the connection handle during configuration, check for a match. + * Also, "direct_io=checkpoint" configures direct I/O for readonly data + * files. + */ if (FLD_ISSET(conn->direct_io, file_type) || (LF_ISSET(WT_OPEN_READONLY) && file_type == WT_FILE_TYPE_DATA && -- cgit v1.2.1 From 7108d61115849c1274c2b940392b0df92a14d3c4 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 28 Mar 2016 16:31:37 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Make the 4 mmap functions methods off the WT_FH handle. --- src/block/block_map.c | 13 ++++++++---- src/block/block_read.c | 6 +++--- src/block/block_write.c | 4 ++-- src/btree/bt_discard.c | 8 ++++--- src/include/extern.h | 12 +++++++---- src/include/misc.i | 14 ------------- src/include/os.h | 4 ++++ src/os_posix/os_map.c | 29 +++++++++++++------------- src/os_posix/os_posix.c | 4 ++++ src/os_posix/os_stdio.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ src/os_win/os_map.c | 16 +++++++------- src/os_win/os_win.c | 4 ++++ 12 files changed, 117 insertions(+), 52 deletions(-) diff --git a/src/block/block_map.c b/src/block/block_map.c index 37895f9892b..8ae6e815798 100644 --- a/src/block/block_map.c +++ b/src/block/block_map.c @@ -16,6 +16,8 @@ int __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp) { + WT_DECL_RET; + *(void **)mapp = NULL; *maplenp = 0; @@ -48,12 +50,15 @@ __wt_block_map( /* * Map the file into memory. - * Ignore errors, we'll read the file through the cache if map fails. + * Ignore not-supported errors, we'll read the file through the cache + * if map fails. */ - (void)__wt_mmap(session, block->fh, mapp, maplenp); + ret = block->fh->fh_map(session, block->fh, mapp, maplenp); + if (ret == ENOTSUP) + ret = 0; #endif - return (0); + return (ret); } /* @@ -65,5 +70,5 @@ __wt_block_unmap( WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen) { /* Unmap the file from memory. */ - return (__wt_munmap(session, block->fh, map, maplen)); + return (block->fh->fh_map_unmap(session, block->fh, map, maplen)); } diff --git a/src/block/block_read.c b/src/block/block_read.c index d0522e155fc..6126a6860c9 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -37,10 +37,10 @@ __wt_bm_preload( mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; if (mapped) - ret = __wt_mmap_preload(session, + ret = block->fh->fh_map_preload(session, block->fh, (uint8_t *)bm->map + offset, size); else - ret = __wt_posix_fadvise(session, + ret = block->fh->fh_advise(session, block->fh, (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED); if (ret == 0) @@ -91,7 +91,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, buf->data = (uint8_t *)bm->map + offset; buf->size = size; if (block->preload_available) - WT_RET(__wt_mmap_preload( + WT_RET(block->fh->fh_map_preload( session, block->fh, buf->data, buf->size)); WT_STAT_FAST_CONN_INCR(session, block_map_read); diff --git a/src/block/block_write.c b/src/block/block_write.c index 771b0d34193..134272b52f9 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -42,8 +42,8 @@ __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) return (0); block->os_cache = 0; - WT_ERR(__wt_posix_fadvise(session, block->fh, - (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)); + WT_ERR(block->fh->fh_advise(session, + block->fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)); return (0); err: /* Ignore ENOTSUP, but don't try again. */ diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index 5983c7e4f18..1181d92609f 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -40,6 +40,7 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) { + WT_FH *fh; WT_PAGE *page; WT_PAGE_HEADER *dsk; WT_PAGE_MODIFY *mod; @@ -133,9 +134,10 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) dsk = (WT_PAGE_HEADER *)page->dsk; if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) __wt_overwrite_and_free_len(session, dsk, dsk->mem_size); - if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) - (void)__wt_mmap_discard( - session, S2BT(session)->bm->block->fh, dsk, dsk->mem_size); + if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) { + fh = S2BT(session)->bm->block->fh; + (void)fh->fh_map_discard(session, fh, dsk, dsk->mem_size); + } __wt_overwrite_and_free(session, page); } diff --git a/src/include/extern.h b/src/include/extern.h index 14c086a1613..48191fd974c 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -741,10 +741,6 @@ extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); extern int __wt_map_error_rdonly(int error); -extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp); -extern int __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); -extern int __wt_mmap_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); -extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len); extern int __wt_once(void (*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp); extern int __wt_os_cleanup(WT_SESSION_IMPL *session); @@ -758,6 +754,10 @@ extern int __wt_os_win(WT_SESSION_IMPL *session); extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session); extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); +extern int __wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp); +extern int __wt_posix_map_discard( WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); +extern int __wt_posix_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); +extern int __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len); extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); @@ -766,6 +766,10 @@ extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_ extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); extern int __wt_win32_errno(void); extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); +extern int __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp); +extern int __wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); +extern int __wt_win_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); +extern int __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len); extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); diff --git a/src/include/misc.i b/src/include/misc.i index 5d9e5943254..2926ff07e58 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -179,20 +179,6 @@ __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) #endif } -/* - * __wt_posix_fadvise -- - * POSIX fadvise. - */ -static inline int -__wt_posix_fadvise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) -{ - WT_RET(__wt_verbose( - session, WT_VERB_HANDLEOPS, "%s: handle-advise", fh->name)); - - return (fh->fh_advise(session, fh, offset, len, advice)); -} - /* * __wt_fallocate -- * Extend a file. diff --git a/src/include/os.h b/src/include/os.h index 9a445b3bcd9..a3637c43fc3 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -112,6 +112,10 @@ struct __wt_fh { int (*fh_close)(WT_SESSION_IMPL *, WT_FH *); int (*fh_getc)(WT_SESSION_IMPL *, WT_FH *, int *); int (*fh_lock)(WT_SESSION_IMPL *, WT_FH *, bool); + int (*fh_map)(WT_SESSION_IMPL *, WT_FH *, void *, size_t *); + int (*fh_map_discard)(WT_SESSION_IMPL *, WT_FH *, void *, size_t); + int (*fh_map_preload)(WT_SESSION_IMPL *, WT_FH *, const void *, size_t); + int (*fh_map_unmap)(WT_SESSION_IMPL *, WT_FH *, void *, size_t); int (*fh_printf)(WT_SESSION_IMPL *, WT_FH *, const char *, va_list); int (*fh_read)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *); int (*fh_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index ee72cad0baa..8d8168362e2 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -9,11 +9,11 @@ #include "wt_internal.h" /* - * __wt_mmap -- + * __wt_posix_map -- * Map a file into memory. */ int -__wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) +__wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) { size_t len; wt_off_t file_size; @@ -57,11 +57,11 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) #ifdef HAVE_POSIX_MADVISE /* - * __mmap_preload_madvise -- + * __posix_map_preload_madvise -- * Cause a section of a memory map to be faulted in. */ static int -__mmap_preload_madvise( +__posix_map_preload_madvise( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) { WT_BM *bm; @@ -102,17 +102,17 @@ __mmap_preload_madvise( #endif /* - * __wt_mmap_preload -- + * __wt_posix_map_preload -- * Cause a section of a memory map to be faulted in. */ int -__wt_mmap_preload( +__wt_posix_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); #ifdef HAVE_POSIX_MADVISE - return (__mmap_preload_madvise(session, fh, p, size)); + return (__posix_map_preload_madvise(session, fh, p, size)); #else WT_UNUSED(fh); WT_UNUSED(p); @@ -123,11 +123,11 @@ __wt_mmap_preload( #ifdef HAVE_POSIX_MADVISE /* - * __mmap_discard_madvise -- + * __posix_map_discard_madvise -- * Discard a chunk of the memory map. */ static int -__mmap_discard_madvise( +__posix_map_discard_madvise( WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) { WT_CONNECTION_IMPL *conn; @@ -149,16 +149,17 @@ __mmap_discard_madvise( #endif /* - * __wt_mmap_discard -- + * __wt_posix_map_discard -- * Discard a chunk of the memory map. */ int -__wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) +__wt_posix_map_discard( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); #ifdef HAVE_POSIX_MADVISE - return (__mmap_discard_madvise(session, fh, p, size)); + return (__posix_map_discard_madvise(session, fh, p, size)); #else WT_UNUSED(fh); WT_UNUSED(p); @@ -168,11 +169,11 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) } /* - * __wt_munmap -- + * __wt_posix_map_unmap -- * Remove a memory mapping. */ int -__wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) +__wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index f7b81099fdb..0fdb0d1fff7 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -668,6 +668,10 @@ directory_open: fh->fh_close = __posix_handle_close; fh->fh_getc = __posix_handle_getc; fh->fh_lock = __posix_handle_lock; + fh->fh_map = __wt_posix_map; + fh->fh_map_discard = __wt_posix_map_discard; + fh->fh_map_preload = __wt_posix_map_preload; + fh->fh_map_unmap = __wt_posix_map_unmap; fh->fh_printf = __posix_handle_printf; fh->fh_read = __posix_handle_read; fh->fh_size = __posix_handle_size; diff --git a/src/os_posix/os_stdio.c b/src/os_posix/os_stdio.c index cd6979f1d20..5e3cd522bd6 100644 --- a/src/os_posix/os_stdio.c +++ b/src/os_posix/os_stdio.c @@ -67,6 +67,57 @@ __stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) WT_RET_MSG(session, ENOTSUP, "%s: handle-lock", fh->name); } +/* + * __stdio_handle_map -- + * Map a file. + */ +static int +__stdio_handle_map(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t *lenp) +{ + WT_UNUSED(p); + WT_UNUSED(lenp); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map", fh->name); +} + +/* + * __stdio_handle_map_discard -- + * Discard a section of a mapped region. + */ +static int +__stdio_handle_map_discard( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-discard", fh->name); +} + +/* + * __stdio_handle_map_preload -- + * Preload a section of a mapped region. + */ +static int +__stdio_handle_map_preload( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t len) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-preload", fh->name); +} + +/* + * __stdio_handle_map_unmap -- + * Unmap a file. + */ +static int +__stdio_handle_map_unmap( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-unmap", fh->name); +} + /* * __stdio_handle_printf -- * ANSI C vfprintf. @@ -159,6 +210,10 @@ __stdio_func_init(WT_FH *fh, const char *name, FILE *fp) fh->fh_close = __stdio_handle_close; fh->fh_getc = __stdio_handle_getc; fh->fh_lock = __stdio_handle_lock; + fh->fh_map = __stdio_handle_map; + fh->fh_map_discard = __stdio_handle_map_discard; + fh->fh_map_preload = __stdio_handle_map_preload; + fh->fh_map_unmap = __stdio_handle_map_unmap; fh->fh_printf = __stdio_handle_printf; fh->fh_read = __stdio_handle_read; fh->fh_size = __stdio_handle_size; diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index 38180efff37..5156310945c 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -9,11 +9,11 @@ #include "wt_internal.h" /* - * __wt_mmap -- + * __wt_win_map -- * Map a file into memory. */ int -__wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) +__wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) { WT_DECL_RET; size_t len; @@ -54,11 +54,11 @@ __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) } /* - * __wt_mmap_preload -- + * __wt_win_map_preload -- * Cause a section of a memory map to be faulted in. */ int -__wt_mmap_preload( +__wt_win_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) { WT_UNUSED(session); @@ -70,11 +70,11 @@ __wt_mmap_preload( } /* - * __wt_mmap_discard -- + * __wt_win_map_discard -- * Discard a chunk of the memory map. */ int -__wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) +__wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) { WT_UNUSED(session); WT_UNUSED(fh); @@ -85,11 +85,11 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) } /* - * __wt_munmap -- + * __wt_win_map_unmap -- * Remove a memory mapping. */ int -__wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) +__wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) { WT_DECL_RET; diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 212fcd0238b..01962b990df 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -617,6 +617,10 @@ directory_open: fh->fh_close = __win_handle_close; fh->fh_getc = __win_handle_getc; fh->fh_lock = __win_handle_lock; + fh->fh_map = __wt_win_map; + fh->fh_map_discard = __wt_win_map_discard; + fh->fh_map_preload = __wt_win_map_preload; + fh->fh_map_unmap = __wt_win_map_unmap; fh->fh_printf = __win_handle_printf; fh->fh_read = __win_handle_read; fh->fh_size = __win_handle_size; -- cgit v1.2.1 From 3dfc368af0eca2bec8f7317f5cbc970390f5446f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 07:58:31 -0400 Subject: WT-2517: wtperf uses setvbuf in a way that isn't supported on Windows MSVC doesn't support a buffer size of 0 to setvbuf. The MSVC documentation says "Buffer size in bytes. Allowable range: 2 <= size <= INT_MAX (2147483647). Internally, the value supplied for size is rounded down to the nearest multiple of 2." --- bench/wtperf/wtperf.c | 2 +- src/btree/bt_debug.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 340c400ba7e..8d3c68e71fd 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1233,7 +1233,7 @@ monitor(void *arg) goto err; } /* Set line buffering for monitor file. */ - (void)setvbuf(fp, NULL, _IOLBF, 0); + (void)setvbuf(fp, NULL, _IOLBF, 1024); fprintf(fp, "#time," "totalsec," diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 7c7f8cab855..2abe6ee9205 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -100,7 +100,7 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile) /* If we're using a file, flush on each line. */ WT_RET(__wt_fopen(session, ofile, WT_FHANDLE_WRITE, 0, &ds->fp)); - (void)setvbuf(ds->fp, NULL, _IOLBF, 0); + (void)setvbuf(ds->fp, NULL, _IOLBF, 1024); return (0); } -- cgit v1.2.1 From 49e31a32c84d071e919b0514206bc76bad3885b0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 07:59:34 -0400 Subject: WT-2517: wtperf uses setvbuf in a way that isn't supported on Windows Increase the line-buffer size for the monitor and stdout streams from 32B to 1024B; I don't understand why a 32B buffer would ever be useful. --- bench/wtperf/misc.c | 2 +- bench/wtperf/wtperf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/wtperf/misc.c b/bench/wtperf/misc.c index bdfd53d5295..98920f6ab64 100644 --- a/bench/wtperf/misc.c +++ b/bench/wtperf/misc.c @@ -54,7 +54,7 @@ setup_log_file(CONFIG *cfg) return (ret); /* Use line buffering for the log file. */ - (void)setvbuf(cfg->logf, NULL, _IOLBF, 32); + (void)setvbuf(cfg->logf, NULL, _IOLBF, 1024); return (0); } diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 8d3c68e71fd..2c0dee71096 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2313,7 +2313,7 @@ main(int argc, char *argv[]) cfg->table_name); /* Make stdout line buffered, so verbose output appears quickly. */ - (void)setvbuf(stdout, NULL, _IOLBF, 32); + (void)setvbuf(stdout, NULL, _IOLBF, 1024); /* Concatenate non-default configuration strings. */ if (cfg->verbose > 1 || user_cconfig != NULL || -- cgit v1.2.1 From 6b72f0ad34701675172c7fe869ca97201185f5a1 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 08:47:13 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Add WiredTiger helper functions set set line buffering and turn off buffering (wrappers for setvbuf), then add style tests so we never call setvbuf directly, to avoid re-introducing the problems in WT-2517. Add a new WiredTiger open-handle flag, WT_STREAM_LINE_BUFFER, which allows the Btree debug code to set line-buffering on its debug stream. --- bench/wtperf/misc.c | 2 +- bench/wtperf/wtperf.c | 4 ++-- build_win/filelist.win | 1 + dist/filelist | 1 + dist/s_funcs.list | 1 + dist/s_string.ok | 1 + dist/s_style | 8 +++++++- src/btree/bt_debug.c | 4 ++-- src/include/extern.h | 2 ++ src/include/os.h | 1 + src/os_posix/os_posix.c | 10 +++++++--- src/os_posix/os_setvbuf.c | 34 ++++++++++++++++++++++++++++++++++ src/os_win/os_win.c | 2 ++ test/recovery/random-abort.c | 2 +- test/recovery/truncated-log.c | 2 +- 15 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 src/os_posix/os_setvbuf.c diff --git a/bench/wtperf/misc.c b/bench/wtperf/misc.c index 98920f6ab64..2821216f240 100644 --- a/bench/wtperf/misc.c +++ b/bench/wtperf/misc.c @@ -54,7 +54,7 @@ setup_log_file(CONFIG *cfg) return (ret); /* Use line buffering for the log file. */ - (void)setvbuf(cfg->logf, NULL, _IOLBF, 1024); + __wt_stream_set_line_buffer(cfg->logf); return (0); } diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 2c0dee71096..9d57bdcf6b0 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1233,7 +1233,7 @@ monitor(void *arg) goto err; } /* Set line buffering for monitor file. */ - (void)setvbuf(fp, NULL, _IOLBF, 1024); + __wt_stream_set_line_buffer(fp); fprintf(fp, "#time," "totalsec," @@ -2313,7 +2313,7 @@ main(int argc, char *argv[]) cfg->table_name); /* Make stdout line buffered, so verbose output appears quickly. */ - (void)setvbuf(stdout, NULL, _IOLBF, 1024); + __wt_stream_set_line_buffer(stdout); /* Concatenate non-default configuration strings. */ if (cfg->verbose > 1 || user_cconfig != NULL || diff --git a/build_win/filelist.win b/build_win/filelist.win index 3e6bcf7f204..6fc20f67c87 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -109,6 +109,7 @@ src/os_posix/os_getopt.c src/os_posix/os_init.c src/os_posix/os_inmemory.c src/os_posix/os_open.c +src/os_posix/os_setvbuf.c src/os_posix/os_stdio.c src/os_posix/os_strtouq.c src/os_win/os_dir.c diff --git a/dist/filelist b/dist/filelist index 4b2bf3439ee..17179c6935f 100644 --- a/dist/filelist +++ b/dist/filelist @@ -120,6 +120,7 @@ src/os_posix/os_pagesize.c src/os_posix/os_path.c src/os_posix/os_posix.c src/os_posix/os_priv.c +src/os_posix/os_setvbuf.c src/os_posix/os_sleep.c src/os_posix/os_stdio.c src/os_posix/os_strtouq.c diff --git a/dist/s_funcs.list b/dist/s_funcs.list index d13f45ab4da..c0d9f2e688f 100644 --- a/dist/s_funcs.list +++ b/dist/s_funcs.list @@ -30,6 +30,7 @@ __wt_nlpo2_round __wt_print_huffman_code __wt_stat_join_aggregate __wt_stat_join_clear_all +__wt_stream_set_no_buffer __wt_try_readlock wiredtiger_config_parser_open wiredtiger_config_validate diff --git a/dist/s_string.ok b/dist/s_string.ok index 018654d46bc..882a1f78d7b 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -967,6 +967,7 @@ sessionp setkv setstr setv +setvbuf sfence sii sizeof diff --git a/dist/s_style b/dist/s_style index 78fb7a6eb03..c6b338076dd 100755 --- a/dist/s_style +++ b/dist/s_style @@ -64,7 +64,7 @@ else ! expr "$f" : 'src/os_win/.*' > /dev/null && ! expr "$f" : 'src/include/extern.h' > /dev/null && ! expr "$f" : 'src/include/os.h' > /dev/null && - grep '__wt_errno' $f > $t; then + grep '__wt_errno' $f > $t; then echo "$f: upper-level code should not call __wt_errno" cat $t fi @@ -83,6 +83,12 @@ else cat $t } + if ! expr "$f" : 'src/os_posix/os_setvbuf.c' > /dev/null && + egrep -w 'setvbuf' $f > $t; then + echo "$f: setvbuf call, use WiredTiger library replacements" + cat $t + fi + # Alignment directive before "struct". egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t test -s $t && { diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 6a36912612f..8ce1463a0db 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -97,8 +97,8 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile) if (ofile == NULL) return (__wt_scr_alloc(session, 512, &ds->msg)); - return (__wt_open(session, ofile, - WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_STREAM_WRITE, &ds->fh)); + return (__wt_open(session, ofile, WT_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_STREAM_LINE_BUFFER | WT_STREAM_WRITE, &ds->fh)); } /* diff --git a/src/include/extern.h b/src/include/extern.h index 48191fd974c..75656f1062b 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -776,5 +776,7 @@ extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session); extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern void __wt_stream_set_line_buffer(FILE *fp); +extern void __wt_stream_set_no_buffer(FILE *fp); extern void __wt_thread_id(char *buf, size_t buflen); extern void __wt_yield(void); diff --git a/src/include/os.h b/src/include/os.h index a3637c43fc3..92d469ff2a7 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -63,6 +63,7 @@ #define WT_OPEN_FIXED 0x004 /* Path isn't relative to home */ #define WT_OPEN_READONLY 0x008 /* Readonly open */ #define WT_STREAM_APPEND 0x010 /* Open a stream: append */ +#define WT_STREAM_LINE_BUFFER 0x010 /* Line buffer the stream */ #define WT_STREAM_READ 0x020 /* Open a stream: read */ #define WT_STREAM_WRITE 0x040 /* Open a stream: write */ diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c index 0fdb0d1fff7..2d450b1df7f 100644 --- a/src/os_posix/os_posix.c +++ b/src/os_posix/os_posix.c @@ -653,9 +653,13 @@ __posix_handle_open(WT_SESSION_IMPL *session, stream_mode = NULL; break; } - if (stream_mode != NULL && (fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: fdopen", name); + if (stream_mode != NULL) { + if ((fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); + if (LF_ISSET(WT_STREAM_LINE_BUFFER)) + __wt_stream_set_line_buffer(fh->fp); + } directory_open: fh->fd = fd; diff --git a/src/os_posix/os_setvbuf.c b/src/os_posix/os_setvbuf.c new file mode 100644 index 00000000000..d6107115eb3 --- /dev/null +++ b/src/os_posix/os_setvbuf.c @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_stream_set_line_buffer -- + * Set line buffering on a stream. + */ +void +__wt_stream_set_line_buffer(FILE *fp) +{ + /* + * This function exists because MSVC doesn't support buffer sizes of 0 + * to the setvbuf call. To avoid re-introducing the bug, we have helper + * functions and disallow calling setvbuf directly in WiredTiger code. + */ + (void)setvbuf(fp, NULL, _IOLBF, 1024); +} + +/* + * __wt_stream_set_no_buffer -- + * Turn off buffering on a stream. + */ +void +__wt_stream_set_no_buffer(FILE *fp) +{ + (void)setvbuf(fp, NULL, _IONBF, 0); +} diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c index 01962b990df..b82845771eb 100644 --- a/src/os_win/os_win.c +++ b/src/os_win/os_win.c @@ -603,6 +603,8 @@ __win_handle_open(WT_SESSION_IMPL *session, if ((fh->fp = fdopen(fd, stream_mode)) == NULL) WT_ERR_MSG(session, __wt_errno(), "%s: handle-open: fdopen", name); + if (LF_ISSET(WT_STREAM_LINE_BUFFER)) + __wt_stream_set_line_buffer(fh->fp); } /* Configure fallocate/posix_fallocate calls. */ diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index f9c3ed28814..cd7d1b08708 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -110,7 +110,7 @@ fill_db(void) /* * Set to no buffering. */ - (void)setvbuf(fp, NULL, _IONBF, 0); + __wt_stream_set_no_buffer(fp); /* * Write data into the table until we are killed by the parent. diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c index 67fdb932c27..e099873e5b9 100644 --- a/test/recovery/truncated-log.c +++ b/test/recovery/truncated-log.c @@ -107,7 +107,7 @@ fill_db(void) /* * Set to no buffering. */ - (void)setvbuf(fp, NULL, _IONBF, 0); + __wt_stream_set_no_buffer(fp); save_lsn.l.file = 0; /* -- cgit v1.2.1 From 7f070a727ed68ca6a4239ccd412a2a786824a6ad Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 09:32:52 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Create os_common, move common OS functions there. Add "fs" to the file names of files that contain a version of the file system APIs. --- build_win/filelist.win | 25 +- dist/filelist | 24 +- dist/s_style | 5 +- dist/s_win | 43 ++- src/include/extern.h | 12 +- src/os_common/filename.c | 192 +++++++++++ src/os_common/os_abort.c | 27 ++ src/os_common/os_alloc.c | 308 +++++++++++++++++ src/os_common/os_fhandle.c | 318 ++++++++++++++++++ src/os_common/os_fs_inmemory.c | 466 ++++++++++++++++++++++++++ src/os_common/os_fs_stdio.c | 236 +++++++++++++ src/os_common/os_getline.c | 51 +++ src/os_common/os_getopt.c | 151 +++++++++ src/os_common/os_init.c | 41 +++ src/os_common/os_setvbuf.c | 34 ++ src/os_common/os_strtouq.c | 25 ++ src/os_posix/os_abort.c | 27 -- src/os_posix/os_alloc.c | 308 ----------------- src/os_posix/os_fs_posix.c | 729 +++++++++++++++++++++++++++++++++++++++++ src/os_posix/os_getline.c | 51 --- src/os_posix/os_getopt.c | 151 --------- src/os_posix/os_init.c | 41 --- src/os_posix/os_inmemory.c | 466 -------------------------- src/os_posix/os_open.c | 318 ------------------ src/os_posix/os_posix.c | 729 ----------------------------------------- src/os_posix/os_setvbuf.c | 34 -- src/os_posix/os_stdio.c | 236 ------------- src/os_posix/os_strtouq.c | 25 -- src/os_win/os_fs_win.c | 676 ++++++++++++++++++++++++++++++++++++++ src/os_win/os_win.c | 676 -------------------------------------- src/support/filename.c | 192 ----------- 31 files changed, 3308 insertions(+), 3309 deletions(-) create mode 100644 src/os_common/filename.c create mode 100644 src/os_common/os_abort.c create mode 100644 src/os_common/os_alloc.c create mode 100644 src/os_common/os_fhandle.c create mode 100644 src/os_common/os_fs_inmemory.c create mode 100644 src/os_common/os_fs_stdio.c create mode 100644 src/os_common/os_getline.c create mode 100644 src/os_common/os_getopt.c create mode 100644 src/os_common/os_init.c create mode 100644 src/os_common/os_setvbuf.c create mode 100644 src/os_common/os_strtouq.c delete mode 100644 src/os_posix/os_abort.c delete mode 100644 src/os_posix/os_alloc.c create mode 100644 src/os_posix/os_fs_posix.c delete mode 100644 src/os_posix/os_getline.c delete mode 100644 src/os_posix/os_getopt.c delete mode 100644 src/os_posix/os_init.c delete mode 100644 src/os_posix/os_inmemory.c delete mode 100644 src/os_posix/os_open.c delete mode 100644 src/os_posix/os_posix.c delete mode 100644 src/os_posix/os_setvbuf.c delete mode 100644 src/os_posix/os_stdio.c delete mode 100644 src/os_posix/os_strtouq.c create mode 100644 src/os_win/os_fs_win.c delete mode 100644 src/os_win/os_win.c delete mode 100644 src/support/filename.c diff --git a/build_win/filelist.win b/build_win/filelist.win index 6fc20f67c87..323e45d0305 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -101,20 +101,21 @@ src/meta/meta_ext.c src/meta/meta_table.c src/meta/meta_track.c src/meta/meta_turtle.c -src/os_posix/os_abort.c -src/os_posix/os_alloc.c -src/os_posix/os_fallocate.c -src/os_posix/os_getline.c -src/os_posix/os_getopt.c -src/os_posix/os_init.c -src/os_posix/os_inmemory.c -src/os_posix/os_open.c -src/os_posix/os_setvbuf.c -src/os_posix/os_stdio.c -src/os_posix/os_strtouq.c +src/os_common/filename.c +src/os_common/os_abort.c +src/os_common/os_alloc.c +src/os_common/os_fhandle.c +src/os_common/os_fs_inmemory.c +src/os_common/os_fs_stdio.c +src/os_common/os_getline.c +src/os_common/os_getopt.c +src/os_common/os_init.c +src/os_common/os_setvbuf.c +src/os_common/os_strtouq.c src/os_win/os_dir.c src/os_win/os_dlopen.c src/os_win/os_errno.c +src/os_win/os_fs_win.c src/os_win/os_getenv.c src/os_win/os_map.c src/os_win/os_mtx_cond.c @@ -127,7 +128,6 @@ src/os_win/os_snprintf.c src/os_win/os_thread.c src/os_win/os_time.c src/os_win/os_vsnprintf.c -src/os_win/os_win.c src/os_win/os_yield.c src/packing/pack_api.c src/packing/pack_impl.c @@ -153,7 +153,6 @@ src/support/cksum.c src/support/cond_auto.c src/support/crypto.c src/support/err.c -src/support/filename.c src/support/global.c src/support/hash_city.c src/support/hash_fnv.c diff --git a/dist/filelist b/dist/filelist index 17179c6935f..2b229e5fa91 100644 --- a/dist/filelist +++ b/dist/filelist @@ -101,29 +101,30 @@ src/meta/meta_ext.c src/meta/meta_table.c src/meta/meta_track.c src/meta/meta_turtle.c -src/os_posix/os_abort.c -src/os_posix/os_alloc.c +src/os_common/filename.c +src/os_common/os_abort.c +src/os_common/os_alloc.c +src/os_common/os_fhandle.c +src/os_common/os_fs_inmemory.c +src/os_common/os_fs_stdio.c +src/os_common/os_getline.c +src/os_common/os_getopt.c +src/os_common/os_init.c +src/os_common/os_setvbuf.c +src/os_common/os_strtouq.c src/os_posix/os_dir.c src/os_posix/os_dlopen.c src/os_posix/os_errno.c src/os_posix/os_fallocate.c +src/os_posix/os_fs_posix.c src/os_posix/os_getenv.c -src/os_posix/os_getline.c -src/os_posix/os_getopt.c -src/os_posix/os_init.c -src/os_posix/os_inmemory.c src/os_posix/os_map.c src/os_posix/os_mtx_cond.c src/os_posix/os_once.c -src/os_posix/os_open.c src/os_posix/os_pagesize.c src/os_posix/os_path.c -src/os_posix/os_posix.c src/os_posix/os_priv.c -src/os_posix/os_setvbuf.c src/os_posix/os_sleep.c -src/os_posix/os_stdio.c -src/os_posix/os_strtouq.c src/os_posix/os_thread.c src/os_posix/os_time.c src/os_posix/os_yield.c @@ -151,7 +152,6 @@ src/support/cksum.c src/support/cond_auto.c src/support/crypto.c src/support/err.c -src/support/filename.c src/support/global.c src/support/hash_city.c src/support/hash_fnv.c diff --git a/dist/s_style b/dist/s_style index c6b338076dd..1222318e1ad 100755 --- a/dist/s_style +++ b/dist/s_style @@ -60,7 +60,8 @@ else echo "$f: use TAILQ for all lists" fi - if ! expr "$f" : 'src/os_posix/.*' > /dev/null && + if ! expr "$f" : 'src/os_common/.*' > /dev/null && + ! expr "$f" : 'src/os_posix/.*' > /dev/null && ! expr "$f" : 'src/os_win/.*' > /dev/null && ! expr "$f" : 'src/include/extern.h' > /dev/null && ! expr "$f" : 'src/include/os.h' > /dev/null && @@ -83,7 +84,7 @@ else cat $t } - if ! expr "$f" : 'src/os_posix/os_setvbuf.c' > /dev/null && + if ! expr "$f" : 'src/os_common/os_setvbuf.c' > /dev/null && egrep -w 'setvbuf' $f > $t; then echo "$f: setvbuf call, use WiredTiger library replacements" cat $t diff --git a/dist/s_win b/dist/s_win index 6127146dc70..24390b0a120 100755 --- a/dist/s_win +++ b/dist/s_win @@ -43,30 +43,29 @@ win_filelist() { f='../build_win/filelist.win' - # Process the files for which there's a Windows-specific version, then - # append Windows-only files and discard POSIX-only files. - (sed \ - -e 's;os_posix/os_dir.c;os_win/os_dir.c;' \ - -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \ - -e 's;os_posix/os_errno.c;os_win/os_errno.c;' \ - -e 's;os_posix/os_exist.c;os_win/os_exist.c;' \ - -e 's;os_posix/os_getenv.c;os_win/os_getenv.c;' \ - -e 's;os_posix/os_map.c;os_win/os_map.c;' \ - -e 's;os_posix/os_mtx_cond.c;os_win/os_mtx_cond.c;' \ - -e 's;os_posix/os_once.c;os_win/os_once.c;' \ - -e 's;os_posix/os_pagesize.c;os_win/os_pagesize.c;' \ - -e 's;os_posix/os_path.c;os_win/os_path.c;' \ - -e 's;os_posix/os_priv.c;os_win/os_priv.c;' \ - -e 's;os_posix/os_sleep.c;os_win/os_sleep.c;' \ - -e 's;os_posix/os_thread.c;os_win/os_thread.c;' \ - -e 's;os_posix/os_time.c;os_win/os_time.c;' \ - -e 's;os_posix/os_yield.c;os_win/os_yield.c;' \ - -e '/src\/os_posix\/os_posix.c/d' \ + # Discard POSIX-only and PPC-only files, add in Windows-only files. + ( + sed \ + -e '/\/os_posix\//d' \ -e '/src\/support\/power8\/crc32.S/d' \ -e '/src\/support\/power8\/crc32_wrapper.c/d' - echo 'src/os_win/os_snprintf.c' - echo 'src/os_win/os_vsnprintf.c' - echo 'src/os_win/os_win.c') < filelist | sort > $t + echo 'src/os_win/os_dir.c' + echo 'src/os_win/os_dlopen.c' + echo 'src/os_win/os_errno.c' + echo 'src/os_win/os_fs_win.c' + echo 'src/os_win/os_getenv.c' + echo 'src/os_win/os_map.c' + echo 'src/os_win/os_mtx_cond.c' + echo 'src/os_win/os_once.c' + echo 'src/os_win/os_pagesize.c' + echo 'src/os_win/os_path.c' + echo 'src/os_win/os_priv.c' + echo 'src/os_win/os_sleep.c' + echo 'src/os_win/os_snprintf.c' + echo 'src/os_win/os_thread.c' + echo 'src/os_win/os_time.c' + echo 'src/os_win/os_vsnprintf.c' + echo 'src/os_win/os_yield.c') < filelist | sort > $t cmp $t $f > /dev/null 2>&1 || (echo "Building $f" && rm -f $f && cp $t $f) diff --git a/src/include/extern.h b/src/include/extern.h index 75656f1062b..f2b620570f1 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -604,12 +604,6 @@ extern int __wt_panic(WT_SESSION_IMPL *session); extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name); extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri); extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri); -extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); -extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); -extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); -extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); -extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); -extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to); extern int __wt_library_init(void); extern int __wt_breakpoint(void); extern void __wt_attach(WT_SESSION_IMPL *session); @@ -730,17 +724,20 @@ extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_s extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); +extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to); extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); extern int __wt_errno(void); +extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); extern int __wt_get_vm_pagesize(void); extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); extern int __wt_map_error_rdonly(int error); +extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); extern int __wt_once(void (*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp); extern int __wt_os_cleanup(WT_SESSION_IMPL *session); @@ -761,7 +758,10 @@ extern int __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); +extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); +extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); extern int __wt_win32_errno(void); diff --git a/src/os_common/filename.c b/src/os_common/filename.c new file mode 100644 index 00000000000..e8edceaa1e2 --- /dev/null +++ b/src/os_common/filename.c @@ -0,0 +1,192 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_filename -- + * Build a file name in a scratch buffer, automatically calculate the + * length of the file name. + */ +int +__wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) +{ + return (__wt_nfilename(session, name, strlen(name), path)); +} + +/* + * __wt_nfilename -- + * Build a file name in a scratch buffer. If the name is already an + * absolute path duplicate it, otherwise generate a path relative to the + * connection home directory. + * Needs to work with a NULL session handle - since this is called via + * the exists API which is used by the test utilities. + */ +int +__wt_nfilename( + WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) +{ + size_t len; + char *buf; + + *path = NULL; + + if (session == NULL || __wt_absolute_path(name)) + WT_RET(__wt_strndup(session, name, namelen, path)); + else { + len = strlen(S2C(session)->home) + 1 + namelen + 1; + WT_RET(__wt_calloc(session, 1, len, &buf)); + snprintf(buf, len, "%s%s%.*s", S2C(session)->home, + __wt_path_separator(), (int)namelen, name); + *path = buf; + } + + return (0); +} + +/* + * __wt_remove_if_exists -- + * Remove a file if it exists. + */ +int +__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name) +{ + bool exist; + + WT_RET(__wt_exist(session, name, &exist)); + if (exist) + WT_RET(__wt_remove(session, name)); + return (0); +} + +/* + * __wt_rename_and_sync_directory -- + * Rename a file and sync the enclosing directory. + */ +int +__wt_rename_and_sync_directory( + WT_SESSION_IMPL *session, const char *from, const char *to) +{ + const char *fp, *tp; + bool same_directory; + + /* Rename the source file to the target. */ + WT_RET(__wt_rename(session, from, to)); + + /* + * Flush the backing directory to guarantee the rename. My reading of + * POSIX 1003.1 is there's no guarantee flushing only one of the from + * or to directories, or flushing a common parent, is sufficient, and + * even if POSIX were to make that guarantee, existing filesystems are + * known to not provide the guarantee or only provide the guarantee + * with specific mount options. Flush both of the from/to directories + * until it's a performance problem. + */ + WT_RET(__wt_directory_sync(session, from)); + + /* + * In almost all cases, we're going to be renaming files in the same + * directory, we can at least fast-path that. + */ + fp = strrchr(from, '/'); + tp = strrchr(to, '/'); + same_directory = (fp == NULL && tp == NULL) || + (fp != NULL && tp != NULL && + fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0); + + return (same_directory ? 0 : __wt_directory_sync(session, to)); +} + +/* + * __wt_sync_handle_and_rename -- + * Sync and close a handle, and swap it into place. + */ +int +__wt_sync_handle_and_rename( + WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to) +{ + WT_DECL_RET; + WT_FH *fh; + + fh = *fhp; + *fhp = NULL; + + /* Flush to disk and close the handle. */ + ret = __wt_fsync(session, fh, true); + WT_TRET(__wt_close(session, &fh)); + WT_RET(ret); + + return (__wt_rename_and_sync_directory(session, from, to)); +} + +/* + * __wt_copy_and_sync -- + * Copy a file safely; here to support the wt utility. + */ +int +__wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) +{ + WT_DECL_ITEM(tmp); + WT_DECL_RET; + WT_FH *ffh, *tfh; + WT_SESSION_IMPL *session; + size_t n; + wt_off_t offset, size; + char *buf; + + session = (WT_SESSION_IMPL *)wt_session; + ffh = tfh = NULL; + buf = NULL; + + /* + * Remove the target file if it exists, then create a temporary file, + * copy the original into it and rename it into place. I don't think + * its necessary to remove the file, or create a copy and do a rename, + * it's likely safe to overwrite the backup file directly. I'm doing + * the remove and rename to insulate us from errors in other programs + * that might not detect a corrupted backup file; it's cheap insurance + * in a path where undetected failure is very bad. + */ + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to)); + + WT_ERR(__wt_remove_if_exists(session, to)); + WT_ERR(__wt_remove_if_exists(session, tmp->data)); + + /* Open the from and temporary file handles. */ + WT_ERR(__wt_open(session, from, + WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &ffh)); + WT_ERR(__wt_open(session, tmp->data, + WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh)); + + /* + * Allocate a copy buffer. Don't use a scratch buffer, this thing is + * big, and we don't want it hanging around. + */ +#define WT_BACKUP_COPY_SIZE (128 * 1024) + WT_ERR(__wt_malloc(session, WT_BACKUP_COPY_SIZE, &buf)); + + /* Get the file's size, then copy the bytes. */ + WT_ERR(__wt_filesize(session, ffh, &size)); + for (offset = 0; size > 0; size -= n, offset += n) { + n = (size_t)WT_MIN(size, WT_BACKUP_COPY_SIZE); + WT_ERR(__wt_read(session, ffh, offset, n, buf)); + WT_ERR(__wt_write(session, tfh, offset, n, buf)); + } + + /* Close the from handle, then swap the temporary file into place. */ + WT_ERR(__wt_close(session, &ffh)); + ret = __wt_sync_handle_and_rename(session, &tfh, tmp->data, to); + +err: WT_TRET(__wt_close(session, &ffh)); + WT_TRET(__wt_close(session, &tfh)); + + __wt_free(session, buf); + __wt_scr_free(session, &tmp); + return (ret); +} diff --git a/src/os_common/os_abort.c b/src/os_common/os_abort.c new file mode 100644 index 00000000000..034eedcfbf8 --- /dev/null +++ b/src/os_common/os_abort.c @@ -0,0 +1,27 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_abort -- + * Abort the process, dropping core. + */ +void +__wt_abort(WT_SESSION_IMPL *session) + WT_GCC_FUNC_ATTRIBUTE((noreturn)) +{ + __wt_errx(session, "aborting WiredTiger library"); + +#ifdef HAVE_DIAGNOSTIC + __wt_attach(session); +#endif + + abort(); + /* NOTREACHED */ +} diff --git a/src/os_common/os_alloc.c b/src/os_common/os_alloc.c new file mode 100644 index 00000000000..cfc7b80450e --- /dev/null +++ b/src/os_common/os_alloc.c @@ -0,0 +1,308 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * On systems with poor default allocators for allocations greater than 16 KB, + * we provide an option to use TCMalloc explicitly. + * This is important on Windows which does not have a builtin mechanism + * to replace C run-time memory management functions with alternatives. + */ +#ifdef HAVE_LIBTCMALLOC +#include + +#define calloc tc_calloc +#define malloc tc_malloc +#define realloc tc_realloc +#define posix_memalign tc_posix_memalign +#define free tc_free +#endif + +/* + * __wt_calloc -- + * ANSI calloc function. + */ +int +__wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp) +{ + void *p; + + /* + * Defensive: if our caller doesn't handle errors correctly, ensure a + * free won't fail. + */ + *(void **)retp = NULL; + + /* + * !!! + * This function MUST handle a NULL WT_SESSION_IMPL handle. + */ + WT_ASSERT(session, number != 0 && size != 0); + + if (session != NULL) + WT_STAT_FAST_CONN_INCR(session, memory_allocation); + + if ((p = calloc(number, size)) == NULL) + WT_RET_MSG(session, __wt_errno(), + "memory allocation of %" WT_SIZET_FMT " bytes failed", + size * number); + + *(void **)retp = p; + return (0); +} + +/* + * __wt_malloc -- + * ANSI malloc function. + */ +int +__wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp) +{ + void *p; + + /* + * Defensive: if our caller doesn't handle errors correctly, ensure a + * free won't fail. + */ + *(void **)retp = NULL; + + /* + * !!! + * This function MUST handle a NULL WT_SESSION_IMPL handle. + */ + WT_ASSERT(session, bytes_to_allocate != 0); + + if (session != NULL) + WT_STAT_FAST_CONN_INCR(session, memory_allocation); + + if ((p = malloc(bytes_to_allocate)) == NULL) + WT_RET_MSG(session, __wt_errno(), + "memory allocation of %" WT_SIZET_FMT " bytes failed", + bytes_to_allocate); + + *(void **)retp = p; + return (0); +} + +/* + * __realloc_func -- + * ANSI realloc function. + */ +static int +__realloc_func(WT_SESSION_IMPL *session, + size_t *bytes_allocated_ret, size_t bytes_to_allocate, bool clear_memory, + void *retp) +{ + void *p; + size_t bytes_allocated; + + /* + * !!! + * This function MUST handle a NULL WT_SESSION_IMPL handle. + * + * Sometimes we're allocating memory and we don't care about the + * final length -- bytes_allocated_ret may be NULL. + */ + p = *(void **)retp; + bytes_allocated = + (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret; + WT_ASSERT(session, + (p == NULL && bytes_allocated == 0) || + (p != NULL && + (bytes_allocated_ret == NULL || bytes_allocated != 0))); + WT_ASSERT(session, bytes_to_allocate != 0); + WT_ASSERT(session, bytes_allocated < bytes_to_allocate); + + if (session != NULL) { + if (p == NULL) + WT_STAT_FAST_CONN_INCR(session, memory_allocation); + else + WT_STAT_FAST_CONN_INCR(session, memory_grow); + } + + if ((p = realloc(p, bytes_to_allocate)) == NULL) + WT_RET_MSG(session, __wt_errno(), + "memory allocation of %" WT_SIZET_FMT " bytes failed", + bytes_to_allocate); + + /* + * Clear the allocated memory, parts of WiredTiger depend on allocated + * memory being cleared. + */ + if (clear_memory) + memset((uint8_t *)p + bytes_allocated, + 0, bytes_to_allocate - bytes_allocated); + + /* Update caller's bytes allocated value. */ + if (bytes_allocated_ret != NULL) + *bytes_allocated_ret = bytes_to_allocate; + + *(void **)retp = p; + return (0); +} + +/* + * __wt_realloc -- + * WiredTiger's realloc API. + */ +int +__wt_realloc(WT_SESSION_IMPL *session, + size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) +{ + return (__realloc_func( + session, bytes_allocated_ret, bytes_to_allocate, true, retp)); +} + +/* + * __wt_realloc_noclear -- + * WiredTiger's realloc API, not clearing allocated memory. + */ +int +__wt_realloc_noclear(WT_SESSION_IMPL *session, + size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) +{ + return (__realloc_func( + session, bytes_allocated_ret, bytes_to_allocate, false, retp)); +} + +/* + * __wt_realloc_aligned -- + * ANSI realloc function that aligns to buffer boundaries, configured with + * the "buffer_alignment" key to wiredtiger_open. + */ +int +__wt_realloc_aligned(WT_SESSION_IMPL *session, + size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) +{ +#if defined(HAVE_POSIX_MEMALIGN) + WT_DECL_RET; + + /* + * !!! + * This function MUST handle a NULL WT_SESSION_IMPL handle. + */ + if (session != NULL && S2C(session)->buffer_alignment > 0) { + void *p, *newp; + size_t bytes_allocated; + + /* + * Sometimes we're allocating memory and we don't care about the + * final length -- bytes_allocated_ret may be NULL. + */ + p = *(void **)retp; + bytes_allocated = + (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret; + WT_ASSERT(session, + (p == NULL && bytes_allocated == 0) || + (p != NULL && + (bytes_allocated_ret == NULL || bytes_allocated != 0))); + WT_ASSERT(session, bytes_to_allocate != 0); + WT_ASSERT(session, bytes_allocated < bytes_to_allocate); + + /* + * We are going to allocate an aligned buffer. When we do this + * repeatedly, the allocator is expected to start on a boundary + * each time, account for that additional space by never asking + * for less than a full alignment size. The primary use case + * for aligned buffers is Linux direct I/O, which requires that + * the size be a multiple of the alignment anyway. + */ + bytes_to_allocate = + WT_ALIGN(bytes_to_allocate, S2C(session)->buffer_alignment); + + WT_STAT_FAST_CONN_INCR(session, memory_allocation); + + if ((ret = posix_memalign(&newp, + S2C(session)->buffer_alignment, + bytes_to_allocate)) != 0) + WT_RET_MSG(session, ret, + "memory allocation of %" WT_SIZET_FMT + " bytes failed", bytes_to_allocate); + + if (p != NULL) + memcpy(newp, p, bytes_allocated); + __wt_free(session, p); + p = newp; + + /* Update caller's bytes allocated value. */ + if (bytes_allocated_ret != NULL) + *bytes_allocated_ret = bytes_to_allocate; + + *(void **)retp = p; + return (0); + } +#endif + /* + * If there is no posix_memalign function, or no alignment configured, + * fall back to realloc. + * + * Windows note: Visual C CRT memalign does not match POSIX behavior + * and would also double each allocation so it is bad for memory use. + */ + return (__realloc_func( + session, bytes_allocated_ret, bytes_to_allocate, false, retp)); +} + +/* + * __wt_strndup -- + * Duplicate a byte string of a given length (and NUL-terminate). + */ +int +__wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) +{ + void *p; + + if (str == NULL) { + *(void **)retp = NULL; + return (0); + } + + WT_RET(__wt_malloc(session, len + 1, &p)); + + /* + * Don't change this to strncpy, we rely on this function to duplicate + * "strings" that contain nul bytes. + */ + memcpy(p, str, len); + ((uint8_t *)p)[len] = '\0'; + + *(void **)retp = p; + return (0); +} + +/* + * __wt_free_int -- + * ANSI free function. + */ +void +__wt_free_int(WT_SESSION_IMPL *session, const void *p_arg) +{ + void *p; + + p = *(void **)p_arg; + if (p == NULL) /* ANSI C free semantics */ + return; + + /* + * If there's a serialization bug we might race with another thread. + * We can't avoid the race (and we aren't willing to flush memory), + * but we minimize the window by clearing the free address, hoping a + * racing thread will see, and won't free, a NULL pointer. + */ + *(void **)p_arg = NULL; + + /* + * !!! + * This function MUST handle a NULL WT_SESSION_IMPL handle. + */ + if (session != NULL) + WT_STAT_FAST_CONN_INCR(session, memory_free); + + free(p); +} diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c new file mode 100644 index 00000000000..9013d67d22b --- /dev/null +++ b/src/os_common/os_fhandle.c @@ -0,0 +1,318 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_handle_search -- + * Search for a matching handle. + */ +bool +__wt_handle_search(WT_SESSION_IMPL *session, const char *name, + bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp) +{ + WT_CONNECTION_IMPL *conn; + WT_FH *fh; + uint64_t bucket, hash; + bool found; + + if (fhp != NULL) + *fhp = NULL; + + conn = S2C(session); + found = false; + + hash = __wt_hash_city64(name, strlen(name)); + bucket = hash % WT_HASH_ARRAY_SIZE; + + __wt_spin_lock(session, &conn->fh_lock); + + /* + * If we already have the file open, optionally increment the reference + * count and return a pointer. + */ + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(name, fh->name) == 0) { + if (increment_ref) + ++fh->ref; + if (fhp != NULL) + *fhp = fh; + found = true; + break; + } + + /* If we don't find a match, optionally add a new entry. */ + if (!found && newfh != NULL) { + newfh->name_hash = hash; + WT_CONN_FILE_INSERT(conn, newfh, bucket); + (void)__wt_atomic_add32(&conn->open_file_count, 1); + + if (increment_ref) + ++newfh->ref; + if (fhp != NULL) + *fhp = newfh; + } + + /* + * Our caller may be operating on the handle itself, optionally leave + * the list locked. + */ + if (unlock) + __wt_spin_unlock(session, &conn->fh_lock); + + return (found); +} + +/* + * __wt_handle_search_unlock -- + * Release handle lock. + */ +void +__wt_handle_search_unlock(WT_SESSION_IMPL *session) +{ + __wt_spin_unlock(session, &S2C(session)->fh_lock); +} + +/* + * __open_verbose -- + * Optionally output a verbose message on handle open. + */ +static inline int +__open_verbose(WT_SESSION_IMPL *session, + const char *name, uint32_t file_type, uint32_t flags) +{ +#ifdef HAVE_VERBOSE + if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS)) + return (0); + + /* + * It's useful to track file opens when debugging platforms, take some + * effort to output good tracking information. + */ + WT_DECL_RET; + WT_DECL_ITEM(tmp); + const char *file_type_tag, *sep; + + switch (file_type) { + case WT_FILE_TYPE_CHECKPOINT: + file_type_tag = "checkpoint"; + break; + case WT_FILE_TYPE_DATA: + file_type_tag = "data"; + break; + case WT_FILE_TYPE_DIRECTORY: + file_type_tag = "directory"; + break; + case WT_FILE_TYPE_LOG: + file_type_tag = "log"; + break; + case WT_FILE_TYPE_REGULAR: + file_type_tag = "regular"; + break; + default: + file_type_tag = "unknown open type"; + break; + } + + sep = ""; + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + +#define WT_OPEN_VERBOSE_FLAG(f, name) \ + if (LF_ISSET(f)) { \ + WT_ERR(__wt_buf_catfmt( \ + session, tmp, "%s%s", sep, name)); \ + sep = ","; \ + } + + WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_APPEND, "stream-append"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_READ, "stream-read"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_WRITE, "stream-write"); + + ret = __wt_verbose(session, WT_VERB_FILEOPS, + "%s: handle-open: type %s, flags %s", + name, file_type_tag, (char *)tmp->data); + +err: __wt_scr_free(session, &tmp); + return (ret); +#else + WT_UNUSED(session); + WT_UNUSED(name); + WT_UNUSED(file_type); + WT_UNUSED(flags); + return (0); +#endif +} + +/* + * __wt_open -- + * Open a file handle. + */ +int +__wt_open(WT_SESSION_IMPL *session, + const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *fh; + bool lock_file, open_called; + char *path; + + WT_ASSERT(session, file_type != 0); /* A file type is required. */ + + conn = S2C(session); + fh = NULL; + open_called = false; + path = NULL; + + WT_RET(__open_verbose(session, name, file_type, flags)); + + /* Check if the handle is already open. */ + if (__wt_handle_search(session, name, true, true, NULL, &fh)) { + /* + * XXX + * The in-memory implementation has to reset the file offset + * when a file is re-opened (which obviously also depends on + * in-memory configurations never opening a file in more than + * one thread at a time). This needs to be fixed. + */ + if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1) + fh->off = 0; + *fhp = fh; + return (0); + } + + /* Allocate a structure and set the name. */ + WT_ERR(__wt_calloc_one(session, &fh)); + WT_ERR(__wt_strdup(session, name, &fh->name)); + + /* + * If this is a read-only connection, open all files read-only except + * the lock file. + * + * The only file created in read-only mode is the lock file. + */ + if (F_ISSET(conn, WT_CONN_READONLY)) { + lock_file = strcmp(name, WT_SINGLETHREAD) == 0; + if (!lock_file) + LF_SET(WT_OPEN_READONLY); + WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE)); + } + + /* Create the path to the file. */ + if (!LF_ISSET(WT_OPEN_FIXED)) + WT_ERR(__wt_filename(session, name, &path)); + + /* Call the underlying open function. */ + WT_ERR(conn->handle_open( + session, fh, path == NULL ? name : path, file_type, flags)); + open_called = true; + + /* + * Repeat the check for a match: if there's no match, link our newly + * created handle onto the database's list of files. + */ + if (__wt_handle_search(session, name, true, true, fh, fhp)) { +err: if (open_called) + WT_TRET(fh->fh_close(session, fh)); + if (fh != NULL) { + __wt_free(session, fh->name); + __wt_free(session, fh); + } + } + + __wt_free(session, path); + return (ret); +} + +/* + * __wt_close -- + * Close a file handle. + */ +int +__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *fh; + uint64_t bucket; + + conn = S2C(session); + + if (*fhp == NULL) + return (0); + fh = *fhp; + *fhp = NULL; + + /* Track handle-close as a file operation, so open and close match. */ + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s: handle-close", fh->name)); + + /* + * If the reference count hasn't gone to 0, or if it's an in-memory + * object, we're done. + * + * Assert the reference count is correct, but don't let it wrap. + */ + __wt_spin_lock(session, &conn->fh_lock); + WT_ASSERT(session, fh->ref > 0); + if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) { + __wt_spin_unlock(session, &conn->fh_lock); + return (0); + } + + /* Remove from the list. */ + bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; + WT_CONN_FILE_REMOVE(conn, fh, bucket); + (void)__wt_atomic_sub32(&conn->open_file_count, 1); + + __wt_spin_unlock(session, &conn->fh_lock); + + /* Discard underlying resources. */ + ret = fh->fh_close(session, fh); + + __wt_free(session, fh->name); + __wt_free(session, fh); + + return (ret); +} + +/* + * __wt_close_connection_close -- + * Close any open file handles at connection close. + */ +int +__wt_close_connection_close(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + WT_FH *fh; + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) { + /* + * In-memory configurations will have open files, but the ref + * counts should be zero. + */ + if (!F_ISSET(conn, WT_CONN_IN_MEMORY) || fh->ref != 0) { + ret = EBUSY; + __wt_errx(session, + "Connection has open file handles: %s", fh->name); + } + + fh->ref = 1; + F_CLR(fh, WT_FH_IN_MEMORY); + + WT_TRET(__wt_close(session, &fh)); + } + return (ret); +} diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c new file mode 100644 index 00000000000..e79054e56ed --- /dev/null +++ b/src/os_common/os_fs_inmemory.c @@ -0,0 +1,466 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * In-memory information. + */ +typedef struct { + WT_SPINLOCK lock; +} WT_IM; + +/* + * __im_directory_list -- + * Get a list of files from a directory, in-memory version. + */ +static int +__im_directory_list(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) +{ + WT_UNUSED(session); + WT_UNUSED(dir); + WT_UNUSED(prefix); + WT_UNUSED(flags); + WT_UNUSED(dirlist); + WT_UNUSED(countp); + + WT_RET_MSG(session, ENOTSUP, "directory-list"); +} + +/* + * __im_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static int +__im_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +} + +/* + * __im_file_exist -- + * Return if the file exists. + */ +static int +__im_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + *existp = __wt_handle_search(session, name, false, true, NULL, NULL); + return (0); +} + +/* + * __im_file_remove -- + * POSIX remove. + */ +static int +__im_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + WT_FH *fh; + + if (__wt_handle_search(session, name, true, true, NULL, &fh)) { + WT_ASSERT(session, fh->ref == 1); + + /* Force a discard of the handle. */ + F_CLR(fh, WT_FH_IN_MEMORY); + ret = __wt_close(session, &fh); + } + return (ret); +} + +/* + * __im_file_rename -- + * POSIX rename. + */ +static int +__im_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *fh; + uint64_t bucket, hash; + char *to_name; + + conn = S2C(session); + + /* We'll need a copy of the target name. */ + WT_RET(__wt_strdup(session, to, &to_name)); + + __wt_spin_lock(session, &conn->fh_lock); + + /* Make sure the target name isn't active. */ + hash = __wt_hash_city64(to, strlen(to)); + bucket = hash % WT_HASH_ARRAY_SIZE; + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(to, fh->name) == 0) + WT_ERR(EPERM); + + /* Find the source name. */ + hash = __wt_hash_city64(from, strlen(from)); + bucket = hash % WT_HASH_ARRAY_SIZE; + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(from, fh->name) == 0) + break; + if (fh == NULL) + WT_ERR(ENOENT); + + /* Remove source from the list. */ + WT_CONN_FILE_REMOVE(conn, fh, bucket); + + /* Swap the names. */ + __wt_free(session, fh->name); + fh->name = to_name; + to_name = NULL; + + /* Put source back on the list. */ + hash = __wt_hash_city64(to, strlen(to)); + bucket = hash % WT_HASH_ARRAY_SIZE; + WT_CONN_FILE_INSERT(conn, fh, bucket); + + if (0) { +err: __wt_free(session, to_name); + } + __wt_spin_unlock(session, &conn->fh_lock); + + return (ret); +} + +/* + * __im_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__im_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WT_DECL_RET; + WT_FH *fh; + WT_IM *im; + + WT_UNUSED(silent); + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + if (__wt_handle_search(session, name, false, false, NULL, &fh)) { + *sizep = (wt_off_t)fh->buf.size; + __wt_handle_search_unlock(session); + } else + ret = ENOENT; + + __wt_spin_unlock(session, &im->lock); + return (ret); +} + +/* + * __im_handle_advise -- + * POSIX fadvise. + */ +static int +__im_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + return (ENOTSUP); +} + +/* + * __im_handle_close -- + * ANSI C close/fclose. + */ +static int +__im_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + __wt_buf_free(session, &fh->buf); + + return (0); +} + +/* + * __im_handle_getc -- + * ANSI C fgetc. + */ +static int +__im_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + WT_IM *im; + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + if (fh->off >= fh->buf.size) + *chp = EOF; + else + *chp = ((char *)fh->buf.data)[fh->off++]; + + __wt_spin_unlock(session, &im->lock); + return (0); +} + +/* + * __im_handle_lock -- + * Lock/unlock a file. + */ +static int +__im_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(lock); + return (0); +} + +/* + * __im_handle_printf -- + * ANSI C vfprintf. + */ +static int +__im_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + va_list ap_copy; + WT_DECL_ITEM(tmp); + WT_DECL_RET; + WT_IM *im; + size_t len; + + im = S2C(session)->inmemory; + + /* Build the string we're writing. */ + WT_RET(__wt_scr_alloc(session, strlen(fmt) * 2 + 128, &tmp)); + for (;;) { + va_copy(ap_copy, ap); + len = (size_t)vsnprintf(tmp->mem, tmp->memsize, fmt, ap_copy); + if (len < tmp->memsize) { + tmp->data = tmp->mem; + tmp->size = len; + break; + } + WT_ERR(__wt_buf_extend(session, tmp, len + 1)); + } + + __wt_spin_lock(session, &im->lock); + + /* Grow the handle's buffer as necessary. */ + WT_ERR(__wt_buf_grow(session, &fh->buf, fh->off + len)); + + /* Copy the data into place and update the offset. */ + memcpy((uint8_t *)fh->buf.mem + fh->off, tmp->data, len); + fh->off += len; + +err: __wt_spin_unlock(session, &im->lock); + + __wt_scr_free(session, &tmp); + return (ret); +} + +/* + * __im_handle_read -- + * POSIX pread. + */ +static int +__im_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_DECL_RET; + WT_IM *im; + size_t off; + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + off = (size_t)offset; + if (off < fh->buf.size) { + len = WT_MIN(len, fh->buf.size - off); + memcpy(buf, (uint8_t *)fh->buf.mem + off, len); + fh->off = off + len; + } else + ret = WT_ERROR; + + __wt_spin_unlock(session, &im->lock); + if (ret == 0) + return (0); + WT_RET_MSG(session, WT_ERROR, + "%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at " + "offset %" WT_SIZET_FMT, + fh->name, len, off); +} + +/* + * __im_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_UNUSED(session); + + *sizep = (wt_off_t)fh->buf.size; + return (0); +} + +/* + * __im_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, and + * won't make further attempts. + */ + return (block ? 0 : ENOTSUP); +} + +/* + * __im_handle_truncate -- + * POSIX ftruncate. + */ +static int +__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + WT_IM *im; + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)len)); + memset((uint8_t *) + fh->buf.mem + fh->buf.size, 0, fh->buf.memsize - fh->buf.size); + +err: __wt_spin_unlock(session, &im->lock); + return (ret); +} + +/* + * __im_handle_write -- + * POSIX pwrite. + */ +static int +__im_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_DECL_RET; + WT_IM *im; + size_t off; + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + off = (size_t)offset; + WT_ERR(__wt_buf_grow(session, &fh->buf, off + len + 1024)); + + memcpy((uint8_t *)fh->buf.data + off, buf, len); + if (off + len > fh->buf.size) + fh->buf.size = off + len; + fh->off = off + len; + +err: __wt_spin_unlock(session, &im->lock); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, + "%s: handle-write: failed to write %" WT_SIZET_FMT " bytes at " + "offset %" WT_SIZET_FMT, + fh->name, len, off); +} + +/* + * __im_handle_open -- + * POSIX fopen/open. + */ +static int +__im_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *path, uint32_t file_type, uint32_t flags) +{ + WT_UNUSED(session); + WT_UNUSED(path); + WT_UNUSED(file_type); + WT_UNUSED(flags); + + fh->off = 0; + F_SET(fh, WT_FH_IN_MEMORY); + + fh->fh_advise = __im_handle_advise; + fh->fh_close = __im_handle_close; + fh->fh_getc = __im_handle_getc; + fh->fh_lock = __im_handle_lock; + fh->fh_printf = __im_handle_printf; + fh->fh_read = __im_handle_read; + fh->fh_size = __im_handle_size; + fh->fh_sync = __im_handle_sync; + fh->fh_truncate = __im_handle_truncate; + fh->fh_write = __im_handle_write; + + return (0); +} + +/* + * __wt_os_inmemory -- + * Initialize an in-memory configuration. + */ +int +__wt_os_inmemory(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_IM *im; + + conn = S2C(session); + im = NULL; + + /* Initialize the in-memory jump table. */ + conn->file_directory_list = __im_directory_list; + conn->file_directory_sync = __im_directory_sync; + conn->file_exist = __im_file_exist; + conn->file_remove = __im_file_remove; + conn->file_rename = __im_file_rename; + conn->file_size = __im_file_size; + conn->handle_open = __im_handle_open; + + /* Allocate an in-memory structure. */ + WT_RET(__wt_calloc_one(session, &im)); + WT_ERR(__wt_spin_init(session, &im->lock, "in-memory I/O")); + conn->inmemory = im; + + return (0); + +err: __wt_free(session, im); + return (ret); +} + +/* + * __wt_os_inmemory_cleanup -- + * Discard an in-memory configuration. + */ +int +__wt_os_inmemory_cleanup(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + WT_IM *im; + + if ((im = S2C(session)->inmemory) == NULL) + return (0); + S2C(session)->inmemory = NULL; + + __wt_spin_destroy(session, &im->lock); + __wt_free(session, im); + + return (ret); +} diff --git a/src/os_common/os_fs_stdio.c b/src/os_common/os_fs_stdio.c new file mode 100644 index 00000000000..5e3cd522bd6 --- /dev/null +++ b/src/os_common/os_fs_stdio.c @@ -0,0 +1,236 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __stdio_handle_advise -- + * POSIX fadvise. + */ +static int +__stdio_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + WT_RET_MSG(session, ENOTSUP, "%s: handle-advise", fh->name); +} + +/* + * __stdio_handle_allocate -- + * POSIX fallocate. + */ +static int +__stdio_handle_allocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); +} + +/* + * __stdio_handle_close -- + * ANSI C close/fclose. + */ +static int +__stdio_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_RET_MSG(session, ENOTSUP, "%s: handle-close", fh->name); +} + +/* + * __stdio_handle_getc -- + * ANSI C fgetc. + */ +static int +__stdio_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + WT_UNUSED(chp); + WT_RET_MSG(session, ENOTSUP, "%s: handle-getc", fh->name); +} + +/* + * __stdio_handle_lock -- + * Lock/unlock a file. + */ +static int +__stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_UNUSED(lock); + WT_RET_MSG(session, ENOTSUP, "%s: handle-lock", fh->name); +} + +/* + * __stdio_handle_map -- + * Map a file. + */ +static int +__stdio_handle_map(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t *lenp) +{ + WT_UNUSED(p); + WT_UNUSED(lenp); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map", fh->name); +} + +/* + * __stdio_handle_map_discard -- + * Discard a section of a mapped region. + */ +static int +__stdio_handle_map_discard( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-discard", fh->name); +} + +/* + * __stdio_handle_map_preload -- + * Preload a section of a mapped region. + */ +static int +__stdio_handle_map_preload( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t len) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-preload", fh->name); +} + +/* + * __stdio_handle_map_unmap -- + * Unmap a file. + */ +static int +__stdio_handle_map_unmap( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-unmap", fh->name); +} + +/* + * __stdio_handle_printf -- + * ANSI C vfprintf. + */ +static int +__stdio_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); +} + +/* + * __stdio_handle_read -- + * POSIX pread. + */ +static int +__stdio_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(buf); + WT_RET_MSG(session, ENOTSUP, "%s: handle-read", fh->name); +} + +/* + * __stdio_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__stdio_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_UNUSED(sizep); + WT_RET_MSG(session, ENOTSUP, "%s: handle-size", fh->name); +} + +/* + * __stdio_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__stdio_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_UNUSED(block); + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); +} + +/* + * __stdio_handle_truncate -- + * POSIX ftruncate. + */ +static int +__stdio_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-truncate", fh->name); +} + +/* + * __stdio_handle_write -- + * POSIX pwrite. + */ +static int +__stdio_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(buf); + WT_RET_MSG(session, ENOTSUP, "%s: handle-write", fh->name); +} + +/* + * __stdio_func_init -- + * Initialize stdio functions. + */ +static void +__stdio_func_init(WT_FH *fh, const char *name, FILE *fp) +{ + fh->name = name; + fh->fp = fp; + + fh->fh_advise = __stdio_handle_advise; + fh->fh_allocate = __stdio_handle_allocate; + fh->fh_close = __stdio_handle_close; + fh->fh_getc = __stdio_handle_getc; + fh->fh_lock = __stdio_handle_lock; + fh->fh_map = __stdio_handle_map; + fh->fh_map_discard = __stdio_handle_map_discard; + fh->fh_map_preload = __stdio_handle_map_preload; + fh->fh_map_unmap = __stdio_handle_map_unmap; + fh->fh_printf = __stdio_handle_printf; + fh->fh_read = __stdio_handle_read; + fh->fh_size = __stdio_handle_size; + fh->fh_sync = __stdio_handle_sync; + fh->fh_truncate = __stdio_handle_truncate; + fh->fh_write = __stdio_handle_write; +} + +/* + * __wt_os_stdio -- + * Initialize the stdio configuration. + */ +int +__wt_os_stdio(WT_SESSION_IMPL *session) +{ + __stdio_func_init(WT_STDERR(session), "stderr", stderr); + __stdio_func_init(WT_STDOUT(session), "stdout", stdout); + + return (0); +} diff --git a/src/os_common/os_getline.c b/src/os_common/os_getline.c new file mode 100644 index 00000000000..01e11581edf --- /dev/null +++ b/src/os_common/os_getline.c @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_getline -- + * Get a line from a stream. + * + * Implementation of the POSIX getline or BSD fgetln functions (finding the + * function in a portable way is hard, it's simple enough to write it instead). + * + * Note: Unlike the standard getline calls, this function doesn't include the + * trailing newline character in the returned buffer and discards empty lines + * (so the caller's EOF marker is a returned line length of 0). + */ +int +__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh) +{ + int c; + + /* + * We always NUL-terminate the returned string (even if it's empty), + * make sure there's buffer space for a trailing NUL in all cases. + */ + WT_RET(__wt_buf_init(session, buf, 100)); + + for (;;) { + WT_RET(fh->fh_getc(session, fh, &c)); + if (c == EOF) + break; + + /* Leave space for a trailing NUL. */ + WT_RET(__wt_buf_extend(session, buf, buf->size + 2)); + if (c == '\n') { + if (buf->size == 0) + continue; + break; + } + ((char *)buf->mem)[buf->size++] = (char)c; + } + + ((char *)buf->mem)[buf->size] = '\0'; + + return (0); +} diff --git a/src/os_common/os_getopt.c b/src/os_common/os_getopt.c new file mode 100644 index 00000000000..0306ad1d79d --- /dev/null +++ b/src/os_common/os_getopt.c @@ -0,0 +1,151 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* $NetBSD: getopt.c,v 1.26 2003/08/07 16:43:40 agc Exp $ */ + +/* + * Copyright (c) 1987, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "wt_internal.h" + +extern int __wt_opterr, __wt_optind, __wt_optopt, __wt_optreset; +int __wt_opterr = 1, /* if error message should be printed */ + __wt_optind = 1, /* index into parent argv vector */ + __wt_optopt, /* character checked for validity */ + __wt_optreset; /* reset getopt */ + +extern char *__wt_optarg; +char *__wt_optarg; /* argument associated with option */ + +#define BADCH (int)'?' +#define BADARG (int)':' +#define EMSG "" + +/* + * __wt_getopt -- + * Parse argc/argv argument vector. + */ +int +__wt_getopt( + const char *progname, int nargc, char * const *nargv, const char *ostr) +{ + static const char *place = EMSG; /* option letter processing */ + const char *oli; /* option letter list index */ + + if (__wt_optreset || *place == 0) { /* update scanning pointer */ + __wt_optreset = 0; + place = nargv[__wt_optind]; + if (__wt_optind >= nargc || *place++ != '-') { + /* Argument is absent or is not an option */ + place = EMSG; + return (-1); + } + __wt_optopt = *place++; + if (__wt_optopt == '-' && *place == 0) { + /* "--" => end of options */ + ++__wt_optind; + place = EMSG; + return (-1); + } + if (__wt_optopt == 0) { + /* Solitary '-', treat as a '-' option + if the program (eg su) is looking for it. */ + place = EMSG; + if (strchr(ostr, '-') == NULL) + return (-1); + __wt_optopt = '-'; + } + } else + __wt_optopt = *place++; + + /* See if option letter is one the caller wanted... */ + if (__wt_optopt == ':' || (oli = strchr(ostr, __wt_optopt)) == NULL) { + if (*place == 0) + ++__wt_optind; + if (__wt_opterr && *ostr != ':') + (void)fprintf(stderr, + "%s: illegal option -- %c\n", progname, + __wt_optopt); + return (BADCH); + } + + /* Does this option need an argument? */ + if (oli[1] != ':') { + /* don't need argument */ + __wt_optarg = NULL; + if (*place == 0) + ++__wt_optind; + } else { + /* Option-argument is either the rest of this argument or the + entire next argument. */ + if (*place) + __wt_optarg = (char *)place; + else if (nargc > ++__wt_optind) + __wt_optarg = nargv[__wt_optind]; + else { + /* option-argument absent */ + place = EMSG; + if (*ostr == ':') + return (BADARG); + if (__wt_opterr) + (void)fprintf(stderr, + "%s: option requires an argument -- %c\n", + progname, __wt_optopt); + return (BADCH); + } + place = EMSG; + ++__wt_optind; + } + return (__wt_optopt); /* return option letter */ +} diff --git a/src/os_common/os_init.c b/src/os_common/os_init.c new file mode 100644 index 00000000000..512216c52a5 --- /dev/null +++ b/src/os_common/os_init.c @@ -0,0 +1,41 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_os_init -- + * Initialize the OS layer. + */ +int +__wt_os_init(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? + __wt_os_inmemory(session) : +#if defined(_MSC_VER) + __wt_os_win(session)); +#else + __wt_os_posix(session)); +#endif +} + +/* + * __wt_os_cleanup -- + * Clean up the OS layer. + */ +int +__wt_os_cleanup(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? + __wt_os_inmemory_cleanup(session) : +#if defined(_MSC_VER) + __wt_os_win_cleanup(session)); +#else + __wt_os_posix_cleanup(session)); +#endif +} diff --git a/src/os_common/os_setvbuf.c b/src/os_common/os_setvbuf.c new file mode 100644 index 00000000000..d6107115eb3 --- /dev/null +++ b/src/os_common/os_setvbuf.c @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_stream_set_line_buffer -- + * Set line buffering on a stream. + */ +void +__wt_stream_set_line_buffer(FILE *fp) +{ + /* + * This function exists because MSVC doesn't support buffer sizes of 0 + * to the setvbuf call. To avoid re-introducing the bug, we have helper + * functions and disallow calling setvbuf directly in WiredTiger code. + */ + (void)setvbuf(fp, NULL, _IOLBF, 1024); +} + +/* + * __wt_stream_set_no_buffer -- + * Turn off buffering on a stream. + */ +void +__wt_stream_set_no_buffer(FILE *fp) +{ + (void)setvbuf(fp, NULL, _IONBF, 0); +} diff --git a/src/os_common/os_strtouq.c b/src/os_common/os_strtouq.c new file mode 100644 index 00000000000..0ae604fc761 --- /dev/null +++ b/src/os_common/os_strtouq.c @@ -0,0 +1,25 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_strtouq -- + * Convert a string to an unsigned quad integer. + */ +uint64_t +__wt_strtouq(const char *nptr, char **endptr, int base) +{ +#if defined(HAVE_STRTOUQ) + return (strtouq(nptr, endptr, base)); +#else + WT_STATIC_ASSERT(sizeof(uint64_t) == sizeof(unsigned long long)); + + return (strtoull(nptr, endptr, base)); +#endif +} diff --git a/src/os_posix/os_abort.c b/src/os_posix/os_abort.c deleted file mode 100644 index 034eedcfbf8..00000000000 --- a/src/os_posix/os_abort.c +++ /dev/null @@ -1,27 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_abort -- - * Abort the process, dropping core. - */ -void -__wt_abort(WT_SESSION_IMPL *session) - WT_GCC_FUNC_ATTRIBUTE((noreturn)) -{ - __wt_errx(session, "aborting WiredTiger library"); - -#ifdef HAVE_DIAGNOSTIC - __wt_attach(session); -#endif - - abort(); - /* NOTREACHED */ -} diff --git a/src/os_posix/os_alloc.c b/src/os_posix/os_alloc.c deleted file mode 100644 index cfc7b80450e..00000000000 --- a/src/os_posix/os_alloc.c +++ /dev/null @@ -1,308 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * On systems with poor default allocators for allocations greater than 16 KB, - * we provide an option to use TCMalloc explicitly. - * This is important on Windows which does not have a builtin mechanism - * to replace C run-time memory management functions with alternatives. - */ -#ifdef HAVE_LIBTCMALLOC -#include - -#define calloc tc_calloc -#define malloc tc_malloc -#define realloc tc_realloc -#define posix_memalign tc_posix_memalign -#define free tc_free -#endif - -/* - * __wt_calloc -- - * ANSI calloc function. - */ -int -__wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp) -{ - void *p; - - /* - * Defensive: if our caller doesn't handle errors correctly, ensure a - * free won't fail. - */ - *(void **)retp = NULL; - - /* - * !!! - * This function MUST handle a NULL WT_SESSION_IMPL handle. - */ - WT_ASSERT(session, number != 0 && size != 0); - - if (session != NULL) - WT_STAT_FAST_CONN_INCR(session, memory_allocation); - - if ((p = calloc(number, size)) == NULL) - WT_RET_MSG(session, __wt_errno(), - "memory allocation of %" WT_SIZET_FMT " bytes failed", - size * number); - - *(void **)retp = p; - return (0); -} - -/* - * __wt_malloc -- - * ANSI malloc function. - */ -int -__wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp) -{ - void *p; - - /* - * Defensive: if our caller doesn't handle errors correctly, ensure a - * free won't fail. - */ - *(void **)retp = NULL; - - /* - * !!! - * This function MUST handle a NULL WT_SESSION_IMPL handle. - */ - WT_ASSERT(session, bytes_to_allocate != 0); - - if (session != NULL) - WT_STAT_FAST_CONN_INCR(session, memory_allocation); - - if ((p = malloc(bytes_to_allocate)) == NULL) - WT_RET_MSG(session, __wt_errno(), - "memory allocation of %" WT_SIZET_FMT " bytes failed", - bytes_to_allocate); - - *(void **)retp = p; - return (0); -} - -/* - * __realloc_func -- - * ANSI realloc function. - */ -static int -__realloc_func(WT_SESSION_IMPL *session, - size_t *bytes_allocated_ret, size_t bytes_to_allocate, bool clear_memory, - void *retp) -{ - void *p; - size_t bytes_allocated; - - /* - * !!! - * This function MUST handle a NULL WT_SESSION_IMPL handle. - * - * Sometimes we're allocating memory and we don't care about the - * final length -- bytes_allocated_ret may be NULL. - */ - p = *(void **)retp; - bytes_allocated = - (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret; - WT_ASSERT(session, - (p == NULL && bytes_allocated == 0) || - (p != NULL && - (bytes_allocated_ret == NULL || bytes_allocated != 0))); - WT_ASSERT(session, bytes_to_allocate != 0); - WT_ASSERT(session, bytes_allocated < bytes_to_allocate); - - if (session != NULL) { - if (p == NULL) - WT_STAT_FAST_CONN_INCR(session, memory_allocation); - else - WT_STAT_FAST_CONN_INCR(session, memory_grow); - } - - if ((p = realloc(p, bytes_to_allocate)) == NULL) - WT_RET_MSG(session, __wt_errno(), - "memory allocation of %" WT_SIZET_FMT " bytes failed", - bytes_to_allocate); - - /* - * Clear the allocated memory, parts of WiredTiger depend on allocated - * memory being cleared. - */ - if (clear_memory) - memset((uint8_t *)p + bytes_allocated, - 0, bytes_to_allocate - bytes_allocated); - - /* Update caller's bytes allocated value. */ - if (bytes_allocated_ret != NULL) - *bytes_allocated_ret = bytes_to_allocate; - - *(void **)retp = p; - return (0); -} - -/* - * __wt_realloc -- - * WiredTiger's realloc API. - */ -int -__wt_realloc(WT_SESSION_IMPL *session, - size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) -{ - return (__realloc_func( - session, bytes_allocated_ret, bytes_to_allocate, true, retp)); -} - -/* - * __wt_realloc_noclear -- - * WiredTiger's realloc API, not clearing allocated memory. - */ -int -__wt_realloc_noclear(WT_SESSION_IMPL *session, - size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) -{ - return (__realloc_func( - session, bytes_allocated_ret, bytes_to_allocate, false, retp)); -} - -/* - * __wt_realloc_aligned -- - * ANSI realloc function that aligns to buffer boundaries, configured with - * the "buffer_alignment" key to wiredtiger_open. - */ -int -__wt_realloc_aligned(WT_SESSION_IMPL *session, - size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) -{ -#if defined(HAVE_POSIX_MEMALIGN) - WT_DECL_RET; - - /* - * !!! - * This function MUST handle a NULL WT_SESSION_IMPL handle. - */ - if (session != NULL && S2C(session)->buffer_alignment > 0) { - void *p, *newp; - size_t bytes_allocated; - - /* - * Sometimes we're allocating memory and we don't care about the - * final length -- bytes_allocated_ret may be NULL. - */ - p = *(void **)retp; - bytes_allocated = - (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret; - WT_ASSERT(session, - (p == NULL && bytes_allocated == 0) || - (p != NULL && - (bytes_allocated_ret == NULL || bytes_allocated != 0))); - WT_ASSERT(session, bytes_to_allocate != 0); - WT_ASSERT(session, bytes_allocated < bytes_to_allocate); - - /* - * We are going to allocate an aligned buffer. When we do this - * repeatedly, the allocator is expected to start on a boundary - * each time, account for that additional space by never asking - * for less than a full alignment size. The primary use case - * for aligned buffers is Linux direct I/O, which requires that - * the size be a multiple of the alignment anyway. - */ - bytes_to_allocate = - WT_ALIGN(bytes_to_allocate, S2C(session)->buffer_alignment); - - WT_STAT_FAST_CONN_INCR(session, memory_allocation); - - if ((ret = posix_memalign(&newp, - S2C(session)->buffer_alignment, - bytes_to_allocate)) != 0) - WT_RET_MSG(session, ret, - "memory allocation of %" WT_SIZET_FMT - " bytes failed", bytes_to_allocate); - - if (p != NULL) - memcpy(newp, p, bytes_allocated); - __wt_free(session, p); - p = newp; - - /* Update caller's bytes allocated value. */ - if (bytes_allocated_ret != NULL) - *bytes_allocated_ret = bytes_to_allocate; - - *(void **)retp = p; - return (0); - } -#endif - /* - * If there is no posix_memalign function, or no alignment configured, - * fall back to realloc. - * - * Windows note: Visual C CRT memalign does not match POSIX behavior - * and would also double each allocation so it is bad for memory use. - */ - return (__realloc_func( - session, bytes_allocated_ret, bytes_to_allocate, false, retp)); -} - -/* - * __wt_strndup -- - * Duplicate a byte string of a given length (and NUL-terminate). - */ -int -__wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) -{ - void *p; - - if (str == NULL) { - *(void **)retp = NULL; - return (0); - } - - WT_RET(__wt_malloc(session, len + 1, &p)); - - /* - * Don't change this to strncpy, we rely on this function to duplicate - * "strings" that contain nul bytes. - */ - memcpy(p, str, len); - ((uint8_t *)p)[len] = '\0'; - - *(void **)retp = p; - return (0); -} - -/* - * __wt_free_int -- - * ANSI free function. - */ -void -__wt_free_int(WT_SESSION_IMPL *session, const void *p_arg) -{ - void *p; - - p = *(void **)p_arg; - if (p == NULL) /* ANSI C free semantics */ - return; - - /* - * If there's a serialization bug we might race with another thread. - * We can't avoid the race (and we aren't willing to flush memory), - * but we minimize the window by clearing the free address, hoping a - * racing thread will see, and won't free, a NULL pointer. - */ - *(void **)p_arg = NULL; - - /* - * !!! - * This function MUST handle a NULL WT_SESSION_IMPL handle. - */ - if (session != NULL) - WT_STAT_FAST_CONN_INCR(session, memory_free); - - free(p); -} diff --git a/src/os_posix/os_fs_posix.c b/src/os_posix/os_fs_posix.c new file mode 100644 index 00000000000..2d450b1df7f --- /dev/null +++ b/src/os_posix/os_fs_posix.c @@ -0,0 +1,729 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __posix_sync -- + * Underlying support function to flush a file handle. + */ +static int +__posix_sync(WT_SESSION_IMPL *session, + int fd, const char *name, const char *func, bool block) +{ + WT_DECL_RET; + +#ifdef HAVE_SYNC_FILE_RANGE + if (!block) { + WT_SYSCALL_RETRY(sync_file_range(fd, + (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func); + } +#else + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, and + * won't make further attempts. + */ + if (!block) + return (ENOTSUP); +#endif + +#if defined(F_FULLFSYNC) + /* + * OS X fsync documentation: + * "Note that while fsync() will flush all data from the host to the + * drive (i.e. the "permanent storage device"), the drive itself may + * not physically write the data to the platters for quite some time + * and it may be written in an out-of-order sequence. For applications + * that require tighter guarantees about the integrity of their data, + * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks + * the drive to flush all buffered data to permanent storage." + * + * OS X F_FULLFSYNC fcntl documentation: + * "This is currently implemented on HFS, MS-DOS (FAT), and Universal + * Disk Format (UDF) file systems." + */ + WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); + if (ret == 0) + return (0); + /* + * Assume F_FULLFSYNC failed because the file system doesn't support it + * and fallback to fsync. + */ +#endif +#if defined(HAVE_FDATASYNC) + WT_SYSCALL_RETRY(fdatasync(fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: fdatasync", name, func); +#else + WT_SYSCALL_RETRY(fsync(fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: fsync", name, func); +#endif +} + +/* + * __posix_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static int +__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ +#ifdef __linux__ + WT_DECL_RET; + int fd, tret; + const char *dir; + char *copy; + + /* + * POSIX 1003.1 does not require that fsync of a file handle ensures the + * entry in the directory containing the file has also reached disk (and + * there are historic Linux filesystems requiring this), do an explicit + * fsync on a file descriptor for the directory to be sure. + */ + copy = NULL; + if (path == NULL || (dir = strrchr(path, '/')) == NULL) + path = S2C(session)->home; + else { + /* + * Copy the directory name, leaving the trailing slash in place, + * so a path of "/foo" doesn't result in an empty string. + */ + WT_RET(__wt_strndup( + session, path, (size_t)(dir - path) + 1, ©)); + path = copy; + } + + WT_SYSCALL_RETRY(( + (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_RET_MSG(session, ret, "%s: directory-sync: open", path); + + ret = __posix_sync(session, fd, path, "directory-sync", true); + + WT_SYSCALL_RETRY(close(fd), tret); + if (tret != 0) + __wt_err(session, tret, "%s: directory-sync: close", path); + return (ret == 0 ? tret : ret); +#else + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +#endif +} + +/* + * __posix_file_exist -- + * Return if the file exists. + */ +static int +__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + struct stat sb; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + WT_SYSCALL_RETRY(stat(name, &sb), ret); + if (ret == 0) + *existp = true; + else if (ret == ENOENT) { + *existp = false; + ret = 0; + } else + __wt_err(session, ret, "%s: file-exist: stat", name); + + __wt_free(session, path); + return (ret); +} + +/* + * __posix_file_remove -- + * Remove a file. + */ +static int +__posix_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + char *path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, name, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + WT_SYSCALL_RETRY(remove(name), ret); + if (ret != 0) + __wt_err(session, ret, "%s: file-remove: remove", name); + + __wt_free(session, path); + return (ret); +} + +/* + * __posix_file_rename -- + * Rename a file. + */ +static int +__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + char *from_path, *to_path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, from, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); + if (__wt_handle_search(session, to, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + from = from_path; + WT_ERR(__wt_filename(session, to, &to_path)); + to = to_path; + + WT_SYSCALL_RETRY(rename(from, to), ret); + if (ret != 0) + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); +} + +/* + * __posix_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__posix_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + /* + * Optionally don't log errors on ENOENT; some callers of this function + * expect failure in that case and don't want an error message logged. + */ + WT_SYSCALL_RETRY(stat(name, &sb), ret); + if (ret == 0) + *sizep = sb.st_size; + else if (ret != ENOENT || !silent) + __wt_err(session, ret, "%s: file-size: stat", name); + + __wt_free(session, path); + + return (ret); +} + +/* + * __posix_handle_advise -- + * POSIX fadvise. + */ +static int +__posix_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ +#if defined(HAVE_POSIX_FADVISE) + WT_DECL_RET; + + /* + * Refuse pre-load when direct I/O is configured for the file, the + * kernel cache isn't interesting. + */ + if (advice == POSIX_MADV_WILLNEED && fh->direct_io) + return (ENOTSUP); + + WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); + if (ret == 0) + return (0); + + /* + * Treat EINVAL as not-supported, some systems don't support some flags. + * Quietly fail, callers expect not-supported failures. + */ + if (ret == EINVAL) + return (ENOTSUP); + + WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name); +#else + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + + /* Quietly fail, callers expect not-supported failures. */ + return (ENOTSUP); +#endif +} + +/* + * __posix_handle_close -- + * ANSI C close/fclose. + */ +static int +__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_DECL_RET; + int tret; + + if (fh->fp == NULL) { + WT_SYSCALL_RETRY(close(fh->fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name); + } + + /* If the handle was opened for writing, flush the file. */ + if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, ret, "%s: handle-close: fflush", fh->name); + } + + if ((tret = fclose(fh->fp)) != 0) { + tret = __wt_errno(); + __wt_err(session, tret, "%s: handle-close: fclose", fh->name); + } + return (ret == 0 ? tret : ret); +} + +/* + * __posix_handle_getc -- + * ANSI C fgetc. + */ +static int +__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, + ENOTSUP, "%s: handle-getc: no stream configured", fh->name); + + *chp = fgetc(fh->fp); + if (*chp != EOF || !ferror(fh->fp)) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); +} + +/* + * __posix_handle_lock -- + * Lock/unlock a file. + */ +static int +__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + struct flock fl; + WT_DECL_RET; + + /* + * WiredTiger requires this function be able to acquire locks past + * the end of file. + * + * Note we're using fcntl(2) locking: all fcntl locks associated with a + * file for a given process are removed when any file descriptor for the + * file is closed by the process, even if a lock was never requested for + * that file descriptor. + */ + fl.l_start = 0; + fl.l_len = 1; + fl.l_type = lock ? F_WRLCK : F_UNLCK; + fl.l_whence = SEEK_SET; + + WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name); +} + +/* + * __posix_handle_printf -- + * ANSI C vfprintf. + */ +static int +__posix_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); +} + +/* + * __posix_handle_read -- + * POSIX pread. + */ +static int +__posix_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + size_t chunk; + ssize_t nr; + uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) + WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), + "%s: handle-read: pread: failed to read %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __posix_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + + WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); + if (ret == 0) { + *sizep = sb.st_size; + return (0); + } + WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name); +} + +/* + * __posix_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + if (fh->fp == NULL) + return (__posix_sync( + session, fh->fd, fh->name, "handle-sync", block)); + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); +} + +/* + * __posix_handle_truncate -- + * POSIX ftruncate. + */ +static int +__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + + WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name); +} + +/* + * __posix_handle_write -- + * POSIX pwrite. + */ +static int +__posix_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + size_t chunk; + ssize_t nw; + const uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) + WT_RET_MSG(session, __wt_errno(), + "%s: handle-write: pwrite: failed to write %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __posix_handle_open_cloexec -- + * Prevent child access to file handles. + */ +static inline int +__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) +{ +#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) + int f; + + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. There's an obvious race + * between the open and this call, prefer the flag to open if available. + */ + if ((f = fcntl(fd, F_GETFD)) == -1 || + fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) + WT_RET_MSG(session, __wt_errno(), + "%s: handle-open: fcntl", name); + return (0); +#else + WT_UNUSED(session); + WT_UNUSED(fd); + WT_UNUSED(name); + return (0); +#endif +} + +/* + * __posix_handle_open -- + * Open a file handle. + */ +static int +__posix_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + mode_t mode; + int f, fd, tret; + bool direct_io; + const char *stream_mode; + + conn = S2C(session); + direct_io = false; + + /* Set up error handling. */ + fh->fd = fd = -1; + fh->fp = NULL; + + if (file_type == WT_FILE_TYPE_DIRECTORY) { + f = O_RDONLY; +#ifdef O_CLOEXEC + /* + * Security: + * The application may spawn a new process, and we don't want + * another process to have access to our file handles. + */ + f |= O_CLOEXEC; +#endif + WT_SYSCALL_RETRY(( + (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, "%s: handle-open: open", name); + WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + goto directory_open; + } + + f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; + if (LF_ISSET(WT_OPEN_CREATE)) { + f |= O_CREAT; + if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + f |= O_EXCL; + mode = 0666; + } else + mode = 0; + +#ifdef O_BINARY + /* Windows clones: we always want to treat the file as a binary. */ + f |= O_BINARY; +#endif +#ifdef O_CLOEXEC + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. + */ + f |= O_CLOEXEC; +#endif +#ifdef O_DIRECT + /* + * Direct I/O: file-type is a flag from the set of possible flags stored + * in the connection handle during configuration, check for a match. + * Also, "direct_io=checkpoint" configures direct I/O for readonly data + * files. + */ + if (FLD_ISSET(conn->direct_io, file_type) || + (LF_ISSET(WT_OPEN_READONLY) && + file_type == WT_FILE_TYPE_DATA && + FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { + f |= O_DIRECT; + direct_io = true; + } +#endif + fh->direct_io = direct_io; +#ifdef O_NOATIME + /* Avoid updating metadata for read-only workloads. */ + if (file_type == WT_FILE_TYPE_DATA) + f |= O_NOATIME; +#endif + + if (file_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { +#ifdef O_DSYNC + f |= O_DSYNC; +#elif defined(O_SYNC) + f |= O_SYNC; +#else + WT_ERR_MSG(session, ENOTSUP, + "unsupported log sync mode configured"); +#endif + } + + WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, + direct_io ? + "%s: handle-open: open: failed with direct I/O configured, " + "some filesystem types do not support direct I/O" : + "%s: handle-open: open", name); + WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + + /* Disable read-ahead on trees: it slows down random read workloads. */ +#if defined(HAVE_POSIX_FADVISE) + if (file_type == WT_FILE_TYPE_DATA) { + WT_SYSCALL_RETRY( + posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, + "%s: handle-open: posix_fadvise", name); + } +#endif + + /* Optionally configure a stdio stream API. */ + switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { + case WT_STREAM_APPEND: + stream_mode = "a"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case WT_STREAM_READ: + stream_mode = "r"; + break; + case WT_STREAM_WRITE: + stream_mode = "w"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case 0: + default: + stream_mode = NULL; + break; + } + if (stream_mode != NULL) { + if ((fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); + if (LF_ISSET(WT_STREAM_LINE_BUFFER)) + __wt_stream_set_line_buffer(fh->fp); + } + +directory_open: + fh->fd = fd; + + /* Configure fallocate calls. */ + __wt_posix_handle_allocate_configure(session, fh); + + fh->fh_advise = __posix_handle_advise; + fh->fh_allocate = __wt_posix_handle_allocate; + fh->fh_close = __posix_handle_close; + fh->fh_getc = __posix_handle_getc; + fh->fh_lock = __posix_handle_lock; + fh->fh_map = __wt_posix_map; + fh->fh_map_discard = __wt_posix_map_discard; + fh->fh_map_preload = __wt_posix_map_preload; + fh->fh_map_unmap = __wt_posix_map_unmap; + fh->fh_printf = __posix_handle_printf; + fh->fh_read = __posix_handle_read; + fh->fh_size = __posix_handle_size; + fh->fh_sync = __posix_handle_sync; + fh->fh_truncate = __posix_handle_truncate; + fh->fh_write = __posix_handle_write; + + return (0); + +err: if (fd != -1) { + WT_SYSCALL_RETRY(close(fd), tret); + if (tret != 0) + __wt_err(session, tret, "%s: handle-open: close", name); + } + return (ret); +} + +/* + * __wt_os_posix -- + * Initialize a POSIX configuration. + */ +int +__wt_os_posix(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* Initialize the POSIX jump table. */ + conn->file_directory_list = __wt_posix_directory_list; + conn->file_directory_sync = __posix_directory_sync; + conn->file_exist = __posix_file_exist; + conn->file_remove = __posix_file_remove; + conn->file_rename = __posix_file_rename; + conn->file_size = __posix_file_size; + conn->handle_open = __posix_handle_open; + + return (0); +} + +/* + * __wt_os_posix_cleanup -- + * Discard a POSIX configuration. + */ +int +__wt_os_posix_cleanup(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (0); +} diff --git a/src/os_posix/os_getline.c b/src/os_posix/os_getline.c deleted file mode 100644 index 01e11581edf..00000000000 --- a/src/os_posix/os_getline.c +++ /dev/null @@ -1,51 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_getline -- - * Get a line from a stream. - * - * Implementation of the POSIX getline or BSD fgetln functions (finding the - * function in a portable way is hard, it's simple enough to write it instead). - * - * Note: Unlike the standard getline calls, this function doesn't include the - * trailing newline character in the returned buffer and discards empty lines - * (so the caller's EOF marker is a returned line length of 0). - */ -int -__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh) -{ - int c; - - /* - * We always NUL-terminate the returned string (even if it's empty), - * make sure there's buffer space for a trailing NUL in all cases. - */ - WT_RET(__wt_buf_init(session, buf, 100)); - - for (;;) { - WT_RET(fh->fh_getc(session, fh, &c)); - if (c == EOF) - break; - - /* Leave space for a trailing NUL. */ - WT_RET(__wt_buf_extend(session, buf, buf->size + 2)); - if (c == '\n') { - if (buf->size == 0) - continue; - break; - } - ((char *)buf->mem)[buf->size++] = (char)c; - } - - ((char *)buf->mem)[buf->size] = '\0'; - - return (0); -} diff --git a/src/os_posix/os_getopt.c b/src/os_posix/os_getopt.c deleted file mode 100644 index 0306ad1d79d..00000000000 --- a/src/os_posix/os_getopt.c +++ /dev/null @@ -1,151 +0,0 @@ -/*- - * Public Domain 2014-2016 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* $NetBSD: getopt.c,v 1.26 2003/08/07 16:43:40 agc Exp $ */ - -/* - * Copyright (c) 1987, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "wt_internal.h" - -extern int __wt_opterr, __wt_optind, __wt_optopt, __wt_optreset; -int __wt_opterr = 1, /* if error message should be printed */ - __wt_optind = 1, /* index into parent argv vector */ - __wt_optopt, /* character checked for validity */ - __wt_optreset; /* reset getopt */ - -extern char *__wt_optarg; -char *__wt_optarg; /* argument associated with option */ - -#define BADCH (int)'?' -#define BADARG (int)':' -#define EMSG "" - -/* - * __wt_getopt -- - * Parse argc/argv argument vector. - */ -int -__wt_getopt( - const char *progname, int nargc, char * const *nargv, const char *ostr) -{ - static const char *place = EMSG; /* option letter processing */ - const char *oli; /* option letter list index */ - - if (__wt_optreset || *place == 0) { /* update scanning pointer */ - __wt_optreset = 0; - place = nargv[__wt_optind]; - if (__wt_optind >= nargc || *place++ != '-') { - /* Argument is absent or is not an option */ - place = EMSG; - return (-1); - } - __wt_optopt = *place++; - if (__wt_optopt == '-' && *place == 0) { - /* "--" => end of options */ - ++__wt_optind; - place = EMSG; - return (-1); - } - if (__wt_optopt == 0) { - /* Solitary '-', treat as a '-' option - if the program (eg su) is looking for it. */ - place = EMSG; - if (strchr(ostr, '-') == NULL) - return (-1); - __wt_optopt = '-'; - } - } else - __wt_optopt = *place++; - - /* See if option letter is one the caller wanted... */ - if (__wt_optopt == ':' || (oli = strchr(ostr, __wt_optopt)) == NULL) { - if (*place == 0) - ++__wt_optind; - if (__wt_opterr && *ostr != ':') - (void)fprintf(stderr, - "%s: illegal option -- %c\n", progname, - __wt_optopt); - return (BADCH); - } - - /* Does this option need an argument? */ - if (oli[1] != ':') { - /* don't need argument */ - __wt_optarg = NULL; - if (*place == 0) - ++__wt_optind; - } else { - /* Option-argument is either the rest of this argument or the - entire next argument. */ - if (*place) - __wt_optarg = (char *)place; - else if (nargc > ++__wt_optind) - __wt_optarg = nargv[__wt_optind]; - else { - /* option-argument absent */ - place = EMSG; - if (*ostr == ':') - return (BADARG); - if (__wt_opterr) - (void)fprintf(stderr, - "%s: option requires an argument -- %c\n", - progname, __wt_optopt); - return (BADCH); - } - place = EMSG; - ++__wt_optind; - } - return (__wt_optopt); /* return option letter */ -} diff --git a/src/os_posix/os_init.c b/src/os_posix/os_init.c deleted file mode 100644 index 512216c52a5..00000000000 --- a/src/os_posix/os_init.c +++ /dev/null @@ -1,41 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_os_init -- - * Initialize the OS layer. - */ -int -__wt_os_init(WT_SESSION_IMPL *session) -{ - return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? - __wt_os_inmemory(session) : -#if defined(_MSC_VER) - __wt_os_win(session)); -#else - __wt_os_posix(session)); -#endif -} - -/* - * __wt_os_cleanup -- - * Clean up the OS layer. - */ -int -__wt_os_cleanup(WT_SESSION_IMPL *session) -{ - return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? - __wt_os_inmemory_cleanup(session) : -#if defined(_MSC_VER) - __wt_os_win_cleanup(session)); -#else - __wt_os_posix_cleanup(session)); -#endif -} diff --git a/src/os_posix/os_inmemory.c b/src/os_posix/os_inmemory.c deleted file mode 100644 index e79054e56ed..00000000000 --- a/src/os_posix/os_inmemory.c +++ /dev/null @@ -1,466 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * In-memory information. - */ -typedef struct { - WT_SPINLOCK lock; -} WT_IM; - -/* - * __im_directory_list -- - * Get a list of files from a directory, in-memory version. - */ -static int -__im_directory_list(WT_SESSION_IMPL *session, const char *dir, - const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) -{ - WT_UNUSED(session); - WT_UNUSED(dir); - WT_UNUSED(prefix); - WT_UNUSED(flags); - WT_UNUSED(dirlist); - WT_UNUSED(countp); - - WT_RET_MSG(session, ENOTSUP, "directory-list"); -} - -/* - * __im_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -static int -__im_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -} - -/* - * __im_file_exist -- - * Return if the file exists. - */ -static int -__im_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) -{ - *existp = __wt_handle_search(session, name, false, true, NULL, NULL); - return (0); -} - -/* - * __im_file_remove -- - * POSIX remove. - */ -static int -__im_file_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - WT_FH *fh; - - if (__wt_handle_search(session, name, true, true, NULL, &fh)) { - WT_ASSERT(session, fh->ref == 1); - - /* Force a discard of the handle. */ - F_CLR(fh, WT_FH_IN_MEMORY); - ret = __wt_close(session, &fh); - } - return (ret); -} - -/* - * __im_file_rename -- - * POSIX rename. - */ -static int -__im_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh; - uint64_t bucket, hash; - char *to_name; - - conn = S2C(session); - - /* We'll need a copy of the target name. */ - WT_RET(__wt_strdup(session, to, &to_name)); - - __wt_spin_lock(session, &conn->fh_lock); - - /* Make sure the target name isn't active. */ - hash = __wt_hash_city64(to, strlen(to)); - bucket = hash % WT_HASH_ARRAY_SIZE; - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(to, fh->name) == 0) - WT_ERR(EPERM); - - /* Find the source name. */ - hash = __wt_hash_city64(from, strlen(from)); - bucket = hash % WT_HASH_ARRAY_SIZE; - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(from, fh->name) == 0) - break; - if (fh == NULL) - WT_ERR(ENOENT); - - /* Remove source from the list. */ - WT_CONN_FILE_REMOVE(conn, fh, bucket); - - /* Swap the names. */ - __wt_free(session, fh->name); - fh->name = to_name; - to_name = NULL; - - /* Put source back on the list. */ - hash = __wt_hash_city64(to, strlen(to)); - bucket = hash % WT_HASH_ARRAY_SIZE; - WT_CONN_FILE_INSERT(conn, fh, bucket); - - if (0) { -err: __wt_free(session, to_name); - } - __wt_spin_unlock(session, &conn->fh_lock); - - return (ret); -} - -/* - * __im_file_size -- - * Get the size of a file in bytes, by file name. - */ -static int -__im_file_size( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) -{ - WT_DECL_RET; - WT_FH *fh; - WT_IM *im; - - WT_UNUSED(silent); - - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); - - if (__wt_handle_search(session, name, false, false, NULL, &fh)) { - *sizep = (wt_off_t)fh->buf.size; - __wt_handle_search_unlock(session); - } else - ret = ENOENT; - - __wt_spin_unlock(session, &im->lock); - return (ret); -} - -/* - * __im_handle_advise -- - * POSIX fadvise. - */ -static int -__im_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - return (ENOTSUP); -} - -/* - * __im_handle_close -- - * ANSI C close/fclose. - */ -static int -__im_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) -{ - __wt_buf_free(session, &fh->buf); - - return (0); -} - -/* - * __im_handle_getc -- - * ANSI C fgetc. - */ -static int -__im_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) -{ - WT_IM *im; - - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); - - if (fh->off >= fh->buf.size) - *chp = EOF; - else - *chp = ((char *)fh->buf.data)[fh->off++]; - - __wt_spin_unlock(session, &im->lock); - return (0); -} - -/* - * __im_handle_lock -- - * Lock/unlock a file. - */ -static int -__im_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(lock); - return (0); -} - -/* - * __im_handle_printf -- - * ANSI C vfprintf. - */ -static int -__im_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - va_list ap_copy; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_IM *im; - size_t len; - - im = S2C(session)->inmemory; - - /* Build the string we're writing. */ - WT_RET(__wt_scr_alloc(session, strlen(fmt) * 2 + 128, &tmp)); - for (;;) { - va_copy(ap_copy, ap); - len = (size_t)vsnprintf(tmp->mem, tmp->memsize, fmt, ap_copy); - if (len < tmp->memsize) { - tmp->data = tmp->mem; - tmp->size = len; - break; - } - WT_ERR(__wt_buf_extend(session, tmp, len + 1)); - } - - __wt_spin_lock(session, &im->lock); - - /* Grow the handle's buffer as necessary. */ - WT_ERR(__wt_buf_grow(session, &fh->buf, fh->off + len)); - - /* Copy the data into place and update the offset. */ - memcpy((uint8_t *)fh->buf.mem + fh->off, tmp->data, len); - fh->off += len; - -err: __wt_spin_unlock(session, &im->lock); - - __wt_scr_free(session, &tmp); - return (ret); -} - -/* - * __im_handle_read -- - * POSIX pread. - */ -static int -__im_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - WT_DECL_RET; - WT_IM *im; - size_t off; - - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); - - off = (size_t)offset; - if (off < fh->buf.size) { - len = WT_MIN(len, fh->buf.size - off); - memcpy(buf, (uint8_t *)fh->buf.mem + off, len); - fh->off = off + len; - } else - ret = WT_ERROR; - - __wt_spin_unlock(session, &im->lock); - if (ret == 0) - return (0); - WT_RET_MSG(session, WT_ERROR, - "%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at " - "offset %" WT_SIZET_FMT, - fh->name, len, off); -} - -/* - * __im_handle_size -- - * Get the size of a file in bytes, by file handle. - */ -static int -__im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - WT_UNUSED(session); - - *sizep = (wt_off_t)fh->buf.size; - return (0); -} - -/* - * __im_handle_sync -- - * POSIX fflush/fsync. - */ -static int -__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - - /* - * Callers attempting asynchronous flush handle ENOTSUP returns, and - * won't make further attempts. - */ - return (block ? 0 : ENOTSUP); -} - -/* - * __im_handle_truncate -- - * POSIX ftruncate. - */ -static int -__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - WT_IM *im; - - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); - - WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)len)); - memset((uint8_t *) - fh->buf.mem + fh->buf.size, 0, fh->buf.memsize - fh->buf.size); - -err: __wt_spin_unlock(session, &im->lock); - return (ret); -} - -/* - * __im_handle_write -- - * POSIX pwrite. - */ -static int -__im_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - WT_DECL_RET; - WT_IM *im; - size_t off; - - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); - - off = (size_t)offset; - WT_ERR(__wt_buf_grow(session, &fh->buf, off + len + 1024)); - - memcpy((uint8_t *)fh->buf.data + off, buf, len); - if (off + len > fh->buf.size) - fh->buf.size = off + len; - fh->off = off + len; - -err: __wt_spin_unlock(session, &im->lock); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, - "%s: handle-write: failed to write %" WT_SIZET_FMT " bytes at " - "offset %" WT_SIZET_FMT, - fh->name, len, off); -} - -/* - * __im_handle_open -- - * POSIX fopen/open. - */ -static int -__im_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *path, uint32_t file_type, uint32_t flags) -{ - WT_UNUSED(session); - WT_UNUSED(path); - WT_UNUSED(file_type); - WT_UNUSED(flags); - - fh->off = 0; - F_SET(fh, WT_FH_IN_MEMORY); - - fh->fh_advise = __im_handle_advise; - fh->fh_close = __im_handle_close; - fh->fh_getc = __im_handle_getc; - fh->fh_lock = __im_handle_lock; - fh->fh_printf = __im_handle_printf; - fh->fh_read = __im_handle_read; - fh->fh_size = __im_handle_size; - fh->fh_sync = __im_handle_sync; - fh->fh_truncate = __im_handle_truncate; - fh->fh_write = __im_handle_write; - - return (0); -} - -/* - * __wt_os_inmemory -- - * Initialize an in-memory configuration. - */ -int -__wt_os_inmemory(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_IM *im; - - conn = S2C(session); - im = NULL; - - /* Initialize the in-memory jump table. */ - conn->file_directory_list = __im_directory_list; - conn->file_directory_sync = __im_directory_sync; - conn->file_exist = __im_file_exist; - conn->file_remove = __im_file_remove; - conn->file_rename = __im_file_rename; - conn->file_size = __im_file_size; - conn->handle_open = __im_handle_open; - - /* Allocate an in-memory structure. */ - WT_RET(__wt_calloc_one(session, &im)); - WT_ERR(__wt_spin_init(session, &im->lock, "in-memory I/O")); - conn->inmemory = im; - - return (0); - -err: __wt_free(session, im); - return (ret); -} - -/* - * __wt_os_inmemory_cleanup -- - * Discard an in-memory configuration. - */ -int -__wt_os_inmemory_cleanup(WT_SESSION_IMPL *session) -{ - WT_DECL_RET; - WT_IM *im; - - if ((im = S2C(session)->inmemory) == NULL) - return (0); - S2C(session)->inmemory = NULL; - - __wt_spin_destroy(session, &im->lock); - __wt_free(session, im); - - return (ret); -} diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c deleted file mode 100644 index 9013d67d22b..00000000000 --- a/src/os_posix/os_open.c +++ /dev/null @@ -1,318 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_handle_search -- - * Search for a matching handle. - */ -bool -__wt_handle_search(WT_SESSION_IMPL *session, const char *name, - bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp) -{ - WT_CONNECTION_IMPL *conn; - WT_FH *fh; - uint64_t bucket, hash; - bool found; - - if (fhp != NULL) - *fhp = NULL; - - conn = S2C(session); - found = false; - - hash = __wt_hash_city64(name, strlen(name)); - bucket = hash % WT_HASH_ARRAY_SIZE; - - __wt_spin_lock(session, &conn->fh_lock); - - /* - * If we already have the file open, optionally increment the reference - * count and return a pointer. - */ - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(name, fh->name) == 0) { - if (increment_ref) - ++fh->ref; - if (fhp != NULL) - *fhp = fh; - found = true; - break; - } - - /* If we don't find a match, optionally add a new entry. */ - if (!found && newfh != NULL) { - newfh->name_hash = hash; - WT_CONN_FILE_INSERT(conn, newfh, bucket); - (void)__wt_atomic_add32(&conn->open_file_count, 1); - - if (increment_ref) - ++newfh->ref; - if (fhp != NULL) - *fhp = newfh; - } - - /* - * Our caller may be operating on the handle itself, optionally leave - * the list locked. - */ - if (unlock) - __wt_spin_unlock(session, &conn->fh_lock); - - return (found); -} - -/* - * __wt_handle_search_unlock -- - * Release handle lock. - */ -void -__wt_handle_search_unlock(WT_SESSION_IMPL *session) -{ - __wt_spin_unlock(session, &S2C(session)->fh_lock); -} - -/* - * __open_verbose -- - * Optionally output a verbose message on handle open. - */ -static inline int -__open_verbose(WT_SESSION_IMPL *session, - const char *name, uint32_t file_type, uint32_t flags) -{ -#ifdef HAVE_VERBOSE - if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS)) - return (0); - - /* - * It's useful to track file opens when debugging platforms, take some - * effort to output good tracking information. - */ - WT_DECL_RET; - WT_DECL_ITEM(tmp); - const char *file_type_tag, *sep; - - switch (file_type) { - case WT_FILE_TYPE_CHECKPOINT: - file_type_tag = "checkpoint"; - break; - case WT_FILE_TYPE_DATA: - file_type_tag = "data"; - break; - case WT_FILE_TYPE_DIRECTORY: - file_type_tag = "directory"; - break; - case WT_FILE_TYPE_LOG: - file_type_tag = "log"; - break; - case WT_FILE_TYPE_REGULAR: - file_type_tag = "regular"; - break; - default: - file_type_tag = "unknown open type"; - break; - } - - sep = ""; - WT_RET(__wt_scr_alloc(session, 0, &tmp)); - -#define WT_OPEN_VERBOSE_FLAG(f, name) \ - if (LF_ISSET(f)) { \ - WT_ERR(__wt_buf_catfmt( \ - session, tmp, "%s%s", sep, name)); \ - sep = ","; \ - } - - WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly"); - WT_OPEN_VERBOSE_FLAG(WT_STREAM_APPEND, "stream-append"); - WT_OPEN_VERBOSE_FLAG(WT_STREAM_READ, "stream-read"); - WT_OPEN_VERBOSE_FLAG(WT_STREAM_WRITE, "stream-write"); - - ret = __wt_verbose(session, WT_VERB_FILEOPS, - "%s: handle-open: type %s, flags %s", - name, file_type_tag, (char *)tmp->data); - -err: __wt_scr_free(session, &tmp); - return (ret); -#else - WT_UNUSED(session); - WT_UNUSED(name); - WT_UNUSED(file_type); - WT_UNUSED(flags); - return (0); -#endif -} - -/* - * __wt_open -- - * Open a file handle. - */ -int -__wt_open(WT_SESSION_IMPL *session, - const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh; - bool lock_file, open_called; - char *path; - - WT_ASSERT(session, file_type != 0); /* A file type is required. */ - - conn = S2C(session); - fh = NULL; - open_called = false; - path = NULL; - - WT_RET(__open_verbose(session, name, file_type, flags)); - - /* Check if the handle is already open. */ - if (__wt_handle_search(session, name, true, true, NULL, &fh)) { - /* - * XXX - * The in-memory implementation has to reset the file offset - * when a file is re-opened (which obviously also depends on - * in-memory configurations never opening a file in more than - * one thread at a time). This needs to be fixed. - */ - if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1) - fh->off = 0; - *fhp = fh; - return (0); - } - - /* Allocate a structure and set the name. */ - WT_ERR(__wt_calloc_one(session, &fh)); - WT_ERR(__wt_strdup(session, name, &fh->name)); - - /* - * If this is a read-only connection, open all files read-only except - * the lock file. - * - * The only file created in read-only mode is the lock file. - */ - if (F_ISSET(conn, WT_CONN_READONLY)) { - lock_file = strcmp(name, WT_SINGLETHREAD) == 0; - if (!lock_file) - LF_SET(WT_OPEN_READONLY); - WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE)); - } - - /* Create the path to the file. */ - if (!LF_ISSET(WT_OPEN_FIXED)) - WT_ERR(__wt_filename(session, name, &path)); - - /* Call the underlying open function. */ - WT_ERR(conn->handle_open( - session, fh, path == NULL ? name : path, file_type, flags)); - open_called = true; - - /* - * Repeat the check for a match: if there's no match, link our newly - * created handle onto the database's list of files. - */ - if (__wt_handle_search(session, name, true, true, fh, fhp)) { -err: if (open_called) - WT_TRET(fh->fh_close(session, fh)); - if (fh != NULL) { - __wt_free(session, fh->name); - __wt_free(session, fh); - } - } - - __wt_free(session, path); - return (ret); -} - -/* - * __wt_close -- - * Close a file handle. - */ -int -__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh; - uint64_t bucket; - - conn = S2C(session); - - if (*fhp == NULL) - return (0); - fh = *fhp; - *fhp = NULL; - - /* Track handle-close as a file operation, so open and close match. */ - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: handle-close", fh->name)); - - /* - * If the reference count hasn't gone to 0, or if it's an in-memory - * object, we're done. - * - * Assert the reference count is correct, but don't let it wrap. - */ - __wt_spin_lock(session, &conn->fh_lock); - WT_ASSERT(session, fh->ref > 0); - if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) { - __wt_spin_unlock(session, &conn->fh_lock); - return (0); - } - - /* Remove from the list. */ - bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; - WT_CONN_FILE_REMOVE(conn, fh, bucket); - (void)__wt_atomic_sub32(&conn->open_file_count, 1); - - __wt_spin_unlock(session, &conn->fh_lock); - - /* Discard underlying resources. */ - ret = fh->fh_close(session, fh); - - __wt_free(session, fh->name); - __wt_free(session, fh); - - return (ret); -} - -/* - * __wt_close_connection_close -- - * Close any open file handles at connection close. - */ -int -__wt_close_connection_close(WT_SESSION_IMPL *session) -{ - WT_DECL_RET; - WT_FH *fh; - WT_CONNECTION_IMPL *conn; - - conn = S2C(session); - - while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) { - /* - * In-memory configurations will have open files, but the ref - * counts should be zero. - */ - if (!F_ISSET(conn, WT_CONN_IN_MEMORY) || fh->ref != 0) { - ret = EBUSY; - __wt_errx(session, - "Connection has open file handles: %s", fh->name); - } - - fh->ref = 1; - F_CLR(fh, WT_FH_IN_MEMORY); - - WT_TRET(__wt_close(session, &fh)); - } - return (ret); -} diff --git a/src/os_posix/os_posix.c b/src/os_posix/os_posix.c deleted file mode 100644 index 2d450b1df7f..00000000000 --- a/src/os_posix/os_posix.c +++ /dev/null @@ -1,729 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __posix_sync -- - * Underlying support function to flush a file handle. - */ -static int -__posix_sync(WT_SESSION_IMPL *session, - int fd, const char *name, const char *func, bool block) -{ - WT_DECL_RET; - -#ifdef HAVE_SYNC_FILE_RANGE - if (!block) { - WT_SYSCALL_RETRY(sync_file_range(fd, - (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func); - } -#else - /* - * Callers attempting asynchronous flush handle ENOTSUP returns, and - * won't make further attempts. - */ - if (!block) - return (ENOTSUP); -#endif - -#if defined(F_FULLFSYNC) - /* - * OS X fsync documentation: - * "Note that while fsync() will flush all data from the host to the - * drive (i.e. the "permanent storage device"), the drive itself may - * not physically write the data to the platters for quite some time - * and it may be written in an out-of-order sequence. For applications - * that require tighter guarantees about the integrity of their data, - * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks - * the drive to flush all buffered data to permanent storage." - * - * OS X F_FULLFSYNC fcntl documentation: - * "This is currently implemented on HFS, MS-DOS (FAT), and Universal - * Disk Format (UDF) file systems." - */ - WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); - if (ret == 0) - return (0); - /* - * Assume F_FULLFSYNC failed because the file system doesn't support it - * and fallback to fsync. - */ -#endif -#if defined(HAVE_FDATASYNC) - WT_SYSCALL_RETRY(fdatasync(fd), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: %s: fdatasync", name, func); -#else - WT_SYSCALL_RETRY(fsync(fd), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: %s: fsync", name, func); -#endif -} - -/* - * __posix_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -static int -__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ -#ifdef __linux__ - WT_DECL_RET; - int fd, tret; - const char *dir; - char *copy; - - /* - * POSIX 1003.1 does not require that fsync of a file handle ensures the - * entry in the directory containing the file has also reached disk (and - * there are historic Linux filesystems requiring this), do an explicit - * fsync on a file descriptor for the directory to be sure. - */ - copy = NULL; - if (path == NULL || (dir = strrchr(path, '/')) == NULL) - path = S2C(session)->home; - else { - /* - * Copy the directory name, leaving the trailing slash in place, - * so a path of "/foo" doesn't result in an empty string. - */ - WT_RET(__wt_strndup( - session, path, (size_t)(dir - path) + 1, ©)); - path = copy; - } - - WT_SYSCALL_RETRY(( - (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_RET_MSG(session, ret, "%s: directory-sync: open", path); - - ret = __posix_sync(session, fd, path, "directory-sync", true); - - WT_SYSCALL_RETRY(close(fd), tret); - if (tret != 0) - __wt_err(session, tret, "%s: directory-sync: close", path); - return (ret == 0 ? tret : ret); -#else - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -#endif -} - -/* - * __posix_file_exist -- - * Return if the file exists. - */ -static int -__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) -{ - struct stat sb; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - name = path; - - WT_SYSCALL_RETRY(stat(name, &sb), ret); - if (ret == 0) - *existp = true; - else if (ret == ENOENT) { - *existp = false; - ret = 0; - } else - __wt_err(session, ret, "%s: file-exist: stat", name); - - __wt_free(session, path); - return (ret); -} - -/* - * __posix_file_remove -- - * Remove a file. - */ -static int -__posix_file_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - char *path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, name, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-remove: file has open handles", name); -#endif - - WT_RET(__wt_filename(session, name, &path)); - name = path; - - WT_SYSCALL_RETRY(remove(name), ret); - if (ret != 0) - __wt_err(session, ret, "%s: file-remove: remove", name); - - __wt_free(session, path); - return (ret); -} - -/* - * __posix_file_rename -- - * Rename a file. - */ -static int -__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_DECL_RET; - char *from_path, *to_path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, from, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", from); - if (__wt_handle_search(session, to, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", to); -#endif - - from_path = to_path = NULL; - WT_ERR(__wt_filename(session, from, &from_path)); - from = from_path; - WT_ERR(__wt_filename(session, to, &to_path)); - to = to_path; - - WT_SYSCALL_RETRY(rename(from, to), ret); - if (ret != 0) - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); - -err: __wt_free(session, from_path); - __wt_free(session, to_path); - return (ret); -} - -/* - * __posix_file_size -- - * Get the size of a file in bytes, by file name. - */ -static int -__posix_file_size( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - name = path; - - /* - * Optionally don't log errors on ENOENT; some callers of this function - * expect failure in that case and don't want an error message logged. - */ - WT_SYSCALL_RETRY(stat(name, &sb), ret); - if (ret == 0) - *sizep = sb.st_size; - else if (ret != ENOENT || !silent) - __wt_err(session, ret, "%s: file-size: stat", name); - - __wt_free(session, path); - - return (ret); -} - -/* - * __posix_handle_advise -- - * POSIX fadvise. - */ -static int -__posix_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) -{ -#if defined(HAVE_POSIX_FADVISE) - WT_DECL_RET; - - /* - * Refuse pre-load when direct I/O is configured for the file, the - * kernel cache isn't interesting. - */ - if (advice == POSIX_MADV_WILLNEED && fh->direct_io) - return (ENOTSUP); - - WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); - if (ret == 0) - return (0); - - /* - * Treat EINVAL as not-supported, some systems don't support some flags. - * Quietly fail, callers expect not-supported failures. - */ - if (ret == EINVAL) - return (ENOTSUP); - - WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name); -#else - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - - /* Quietly fail, callers expect not-supported failures. */ - return (ENOTSUP); -#endif -} - -/* - * __posix_handle_close -- - * ANSI C close/fclose. - */ -static int -__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - int tret; - - if (fh->fp == NULL) { - WT_SYSCALL_RETRY(close(fh->fd), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name); - } - - /* If the handle was opened for writing, flush the file. */ - if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, ret, "%s: handle-close: fflush", fh->name); - } - - if ((tret = fclose(fh->fp)) != 0) { - tret = __wt_errno(); - __wt_err(session, tret, "%s: handle-close: fclose", fh->name); - } - return (ret == 0 ? tret : ret); -} - -/* - * __posix_handle_getc -- - * ANSI C fgetc. - */ -static int -__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, - ENOTSUP, "%s: handle-getc: no stream configured", fh->name); - - *chp = fgetc(fh->fp); - if (*chp != EOF || !ferror(fh->fp)) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); -} - -/* - * __posix_handle_lock -- - * Lock/unlock a file. - */ -static int -__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) -{ - struct flock fl; - WT_DECL_RET; - - /* - * WiredTiger requires this function be able to acquire locks past - * the end of file. - * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - */ - fl.l_start = 0; - fl.l_len = 1; - fl.l_type = lock ? F_WRLCK : F_UNLCK; - fl.l_whence = SEEK_SET; - - WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name); -} - -/* - * __posix_handle_printf -- - * ANSI C vfprintf. - */ -static int -__posix_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, ENOTSUP, - "%s: vfprintf: no stream configured", fh->name); - - if (vfprintf(fh->fp, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); -} - -/* - * __posix_handle_read -- - * POSIX pread. - */ -static int -__posix_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - size_t chunk; - ssize_t nr; - uint8_t *addr; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) - WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), - "%s: handle-read: pread: failed to read %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __posix_handle_size -- - * Get the size of a file in bytes, by file handle. - */ -static int -__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - - WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); - if (ret == 0) { - *sizep = sb.st_size; - return (0); - } - WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name); -} - -/* - * __posix_handle_sync -- - * POSIX fflush/fsync. - */ -static int -__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) -{ - if (fh->fp == NULL) - return (__posix_sync( - session, fh->fd, fh->name, "handle-sync", block)); - - if (fflush(fh->fp) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); -} - -/* - * __posix_handle_truncate -- - * POSIX ftruncate. - */ -static int -__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - - WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name); -} - -/* - * __posix_handle_write -- - * POSIX pwrite. - */ -static int -__posix_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - size_t chunk; - ssize_t nw; - const uint8_t *addr; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) - WT_RET_MSG(session, __wt_errno(), - "%s: handle-write: pwrite: failed to write %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __posix_handle_open_cloexec -- - * Prevent child access to file handles. - */ -static inline int -__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) -{ -#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) - int f; - - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. There's an obvious race - * between the open and this call, prefer the flag to open if available. - */ - if ((f = fcntl(fd, F_GETFD)) == -1 || - fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) - WT_RET_MSG(session, __wt_errno(), - "%s: handle-open: fcntl", name); - return (0); -#else - WT_UNUSED(session); - WT_UNUSED(fd); - WT_UNUSED(name); - return (0); -#endif -} - -/* - * __posix_handle_open -- - * Open a file handle. - */ -static int -__posix_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - mode_t mode; - int f, fd, tret; - bool direct_io; - const char *stream_mode; - - conn = S2C(session); - direct_io = false; - - /* Set up error handling. */ - fh->fd = fd = -1; - fh->fp = NULL; - - if (file_type == WT_FILE_TYPE_DIRECTORY) { - f = O_RDONLY; -#ifdef O_CLOEXEC - /* - * Security: - * The application may spawn a new process, and we don't want - * another process to have access to our file handles. - */ - f |= O_CLOEXEC; -#endif - WT_SYSCALL_RETRY(( - (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_ERR_MSG(session, ret, "%s: handle-open: open", name); - WT_ERR(__posix_handle_open_cloexec(session, fd, name)); - goto directory_open; - } - - f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; - if (LF_ISSET(WT_OPEN_CREATE)) { - f |= O_CREAT; - if (LF_ISSET(WT_OPEN_EXCLUSIVE)) - f |= O_EXCL; - mode = 0666; - } else - mode = 0; - -#ifdef O_BINARY - /* Windows clones: we always want to treat the file as a binary. */ - f |= O_BINARY; -#endif -#ifdef O_CLOEXEC - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. - */ - f |= O_CLOEXEC; -#endif -#ifdef O_DIRECT - /* - * Direct I/O: file-type is a flag from the set of possible flags stored - * in the connection handle during configuration, check for a match. - * Also, "direct_io=checkpoint" configures direct I/O for readonly data - * files. - */ - if (FLD_ISSET(conn->direct_io, file_type) || - (LF_ISSET(WT_OPEN_READONLY) && - file_type == WT_FILE_TYPE_DATA && - FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { - f |= O_DIRECT; - direct_io = true; - } -#endif - fh->direct_io = direct_io; -#ifdef O_NOATIME - /* Avoid updating metadata for read-only workloads. */ - if (file_type == WT_FILE_TYPE_DATA) - f |= O_NOATIME; -#endif - - if (file_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { -#ifdef O_DSYNC - f |= O_DSYNC; -#elif defined(O_SYNC) - f |= O_SYNC; -#else - WT_ERR_MSG(session, ENOTSUP, - "unsupported log sync mode configured"); -#endif - } - - WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_ERR_MSG(session, ret, - direct_io ? - "%s: handle-open: open: failed with direct I/O configured, " - "some filesystem types do not support direct I/O" : - "%s: handle-open: open", name); - WT_ERR(__posix_handle_open_cloexec(session, fd, name)); - - /* Disable read-ahead on trees: it slows down random read workloads. */ -#if defined(HAVE_POSIX_FADVISE) - if (file_type == WT_FILE_TYPE_DATA) { - WT_SYSCALL_RETRY( - posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); - if (ret != 0) - WT_ERR_MSG(session, ret, - "%s: handle-open: posix_fadvise", name); - } -#endif - - /* Optionally configure a stdio stream API. */ - switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { - case WT_STREAM_APPEND: - stream_mode = "a"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case WT_STREAM_READ: - stream_mode = "r"; - break; - case WT_STREAM_WRITE: - stream_mode = "w"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case 0: - default: - stream_mode = NULL; - break; - } - if (stream_mode != NULL) { - if ((fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: fdopen", name); - if (LF_ISSET(WT_STREAM_LINE_BUFFER)) - __wt_stream_set_line_buffer(fh->fp); - } - -directory_open: - fh->fd = fd; - - /* Configure fallocate calls. */ - __wt_posix_handle_allocate_configure(session, fh); - - fh->fh_advise = __posix_handle_advise; - fh->fh_allocate = __wt_posix_handle_allocate; - fh->fh_close = __posix_handle_close; - fh->fh_getc = __posix_handle_getc; - fh->fh_lock = __posix_handle_lock; - fh->fh_map = __wt_posix_map; - fh->fh_map_discard = __wt_posix_map_discard; - fh->fh_map_preload = __wt_posix_map_preload; - fh->fh_map_unmap = __wt_posix_map_unmap; - fh->fh_printf = __posix_handle_printf; - fh->fh_read = __posix_handle_read; - fh->fh_size = __posix_handle_size; - fh->fh_sync = __posix_handle_sync; - fh->fh_truncate = __posix_handle_truncate; - fh->fh_write = __posix_handle_write; - - return (0); - -err: if (fd != -1) { - WT_SYSCALL_RETRY(close(fd), tret); - if (tret != 0) - __wt_err(session, tret, "%s: handle-open: close", name); - } - return (ret); -} - -/* - * __wt_os_posix -- - * Initialize a POSIX configuration. - */ -int -__wt_os_posix(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - - conn = S2C(session); - - /* Initialize the POSIX jump table. */ - conn->file_directory_list = __wt_posix_directory_list; - conn->file_directory_sync = __posix_directory_sync; - conn->file_exist = __posix_file_exist; - conn->file_remove = __posix_file_remove; - conn->file_rename = __posix_file_rename; - conn->file_size = __posix_file_size; - conn->handle_open = __posix_handle_open; - - return (0); -} - -/* - * __wt_os_posix_cleanup -- - * Discard a POSIX configuration. - */ -int -__wt_os_posix_cleanup(WT_SESSION_IMPL *session) -{ - WT_UNUSED(session); - - return (0); -} diff --git a/src/os_posix/os_setvbuf.c b/src/os_posix/os_setvbuf.c deleted file mode 100644 index d6107115eb3..00000000000 --- a/src/os_posix/os_setvbuf.c +++ /dev/null @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_stream_set_line_buffer -- - * Set line buffering on a stream. - */ -void -__wt_stream_set_line_buffer(FILE *fp) -{ - /* - * This function exists because MSVC doesn't support buffer sizes of 0 - * to the setvbuf call. To avoid re-introducing the bug, we have helper - * functions and disallow calling setvbuf directly in WiredTiger code. - */ - (void)setvbuf(fp, NULL, _IOLBF, 1024); -} - -/* - * __wt_stream_set_no_buffer -- - * Turn off buffering on a stream. - */ -void -__wt_stream_set_no_buffer(FILE *fp) -{ - (void)setvbuf(fp, NULL, _IONBF, 0); -} diff --git a/src/os_posix/os_stdio.c b/src/os_posix/os_stdio.c deleted file mode 100644 index 5e3cd522bd6..00000000000 --- a/src/os_posix/os_stdio.c +++ /dev/null @@ -1,236 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __stdio_handle_advise -- - * POSIX fadvise. - */ -static int -__stdio_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) -{ - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - WT_RET_MSG(session, ENOTSUP, "%s: handle-advise", fh->name); -} - -/* - * __stdio_handle_allocate -- - * POSIX fallocate. - */ -static int -__stdio_handle_allocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) -{ - WT_UNUSED(offset); - WT_UNUSED(len); - WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); -} - -/* - * __stdio_handle_close -- - * ANSI C close/fclose. - */ -static int -__stdio_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_RET_MSG(session, ENOTSUP, "%s: handle-close", fh->name); -} - -/* - * __stdio_handle_getc -- - * ANSI C fgetc. - */ -static int -__stdio_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) -{ - WT_UNUSED(chp); - WT_RET_MSG(session, ENOTSUP, "%s: handle-getc", fh->name); -} - -/* - * __stdio_handle_lock -- - * Lock/unlock a file. - */ -static int -__stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) -{ - WT_UNUSED(lock); - WT_RET_MSG(session, ENOTSUP, "%s: handle-lock", fh->name); -} - -/* - * __stdio_handle_map -- - * Map a file. - */ -static int -__stdio_handle_map(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t *lenp) -{ - WT_UNUSED(p); - WT_UNUSED(lenp); - WT_RET_MSG(session, ENOTSUP, "%s: handle-map", fh->name); -} - -/* - * __stdio_handle_map_discard -- - * Discard a section of a mapped region. - */ -static int -__stdio_handle_map_discard( - WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len) -{ - WT_UNUSED(p); - WT_UNUSED(len); - WT_RET_MSG(session, ENOTSUP, "%s: handle-map-discard", fh->name); -} - -/* - * __stdio_handle_map_preload -- - * Preload a section of a mapped region. - */ -static int -__stdio_handle_map_preload( - WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t len) -{ - WT_UNUSED(p); - WT_UNUSED(len); - WT_RET_MSG(session, ENOTSUP, "%s: handle-map-preload", fh->name); -} - -/* - * __stdio_handle_map_unmap -- - * Unmap a file. - */ -static int -__stdio_handle_map_unmap( - WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len) -{ - WT_UNUSED(p); - WT_UNUSED(len); - WT_RET_MSG(session, ENOTSUP, "%s: handle-map-unmap", fh->name); -} - -/* - * __stdio_handle_printf -- - * ANSI C vfprintf. - */ -static int -__stdio_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - if (vfprintf(fh->fp, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); -} - -/* - * __stdio_handle_read -- - * POSIX pread. - */ -static int -__stdio_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(buf); - WT_RET_MSG(session, ENOTSUP, "%s: handle-read", fh->name); -} - -/* - * __stdio_handle_size -- - * Get the size of a file in bytes, by file handle. - */ -static int -__stdio_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - WT_UNUSED(sizep); - WT_RET_MSG(session, ENOTSUP, "%s: handle-size", fh->name); -} - -/* - * __stdio_handle_sync -- - * POSIX fflush/fsync. - */ -static int -__stdio_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) -{ - WT_UNUSED(block); - - if (fflush(fh->fp) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); -} - -/* - * __stdio_handle_truncate -- - * POSIX ftruncate. - */ -static int -__stdio_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_UNUSED(len); - WT_RET_MSG(session, ENOTSUP, "%s: handle-truncate", fh->name); -} - -/* - * __stdio_handle_write -- - * POSIX pwrite. - */ -static int -__stdio_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(buf); - WT_RET_MSG(session, ENOTSUP, "%s: handle-write", fh->name); -} - -/* - * __stdio_func_init -- - * Initialize stdio functions. - */ -static void -__stdio_func_init(WT_FH *fh, const char *name, FILE *fp) -{ - fh->name = name; - fh->fp = fp; - - fh->fh_advise = __stdio_handle_advise; - fh->fh_allocate = __stdio_handle_allocate; - fh->fh_close = __stdio_handle_close; - fh->fh_getc = __stdio_handle_getc; - fh->fh_lock = __stdio_handle_lock; - fh->fh_map = __stdio_handle_map; - fh->fh_map_discard = __stdio_handle_map_discard; - fh->fh_map_preload = __stdio_handle_map_preload; - fh->fh_map_unmap = __stdio_handle_map_unmap; - fh->fh_printf = __stdio_handle_printf; - fh->fh_read = __stdio_handle_read; - fh->fh_size = __stdio_handle_size; - fh->fh_sync = __stdio_handle_sync; - fh->fh_truncate = __stdio_handle_truncate; - fh->fh_write = __stdio_handle_write; -} - -/* - * __wt_os_stdio -- - * Initialize the stdio configuration. - */ -int -__wt_os_stdio(WT_SESSION_IMPL *session) -{ - __stdio_func_init(WT_STDERR(session), "stderr", stderr); - __stdio_func_init(WT_STDOUT(session), "stdout", stdout); - - return (0); -} diff --git a/src/os_posix/os_strtouq.c b/src/os_posix/os_strtouq.c deleted file mode 100644 index 0ae604fc761..00000000000 --- a/src/os_posix/os_strtouq.c +++ /dev/null @@ -1,25 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_strtouq -- - * Convert a string to an unsigned quad integer. - */ -uint64_t -__wt_strtouq(const char *nptr, char **endptr, int base) -{ -#if defined(HAVE_STRTOUQ) - return (strtouq(nptr, endptr, base)); -#else - WT_STATIC_ASSERT(sizeof(uint64_t) == sizeof(unsigned long long)); - - return (strtoull(nptr, endptr, base)); -#endif -} diff --git a/src/os_win/os_fs_win.c b/src/os_win/os_fs_win.c new file mode 100644 index 00000000000..b82845771eb --- /dev/null +++ b/src/os_win/os_fs_win.c @@ -0,0 +1,676 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __win_directory_sync -- + * Flush a directory to ensure a file creation is durable. + */ +static int +__win_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +} + +/* + * __win_file_exist -- + * Return if the file exists. + */ +static int +__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + + ret = GetFileAttributesA(path); + + __wt_free(session, path); + + if (ret != INVALID_FILE_ATTRIBUTES) + *existp = true; + else + *existp = false; + + return (0); +} + +/* + * __win_file_remove -- + * Remove a file. + */ +static int +__win_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + char *path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, name, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + if (DeleteFileA(name) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, "%s: file-remove: DeleteFileA", name); + } + + __wt_free(session, path); + return (ret); +} + +/* + * __win_file_rename -- + * Rename a file. + */ +static int +__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + char *from_path, *to_path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, from, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); + if (__wt_handle_search(session, to, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + from = from_path; + WT_ERR(__wt_filename(session, to, &to_path)); + to = to_path; + + /* + * Check if file exists since Windows does not override the file if + * it exists. + */ + if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) + if (DeleteFileA(to) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + } + + if (ret == 0 && MoveFileA(from, to) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + } + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); +} + +/* + * __win_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__win_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WIN32_FILE_ATTRIBUTE_DATA data; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + + ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); + + __wt_free(session, path); + + if (ret != 0) { + *sizep = + ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; + return (0); + } + + /* + * Some callers of this function expect failure if the file doesn't + * exist, and don't want an error message logged. + */ + ret = __wt_win32_errno(); + if (!silent) + WT_RET_MSG(session, ret, + "%s: file-size: GetFileAttributesEx", name); + return (ret); +} + +/* + * __win_handle_advise -- + * MSVC fadvise. + */ +static int +__win_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + + /* Quietly fail, callers expect not-supported failures. */ + return (ENOTSUP); +} + +/* + * __win_handle_allocate_configure -- + * Configure fallocate behavior for a file handle. + */ +static void +__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_UNUSED(session); + + /* + * fallocate on Windows would be implemented using SetEndOfFile, which + * can also truncate the file. WiredTiger expects fallocate to ignore + * requests to truncate the file which Windows does not do, so we don't + * support the call. + */ + fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; + fh->fallocate_requires_locking = false; +} + +/* + * __win_handle_allocate -- + * Allocate space for a file handle. + */ +static int +__win_handle_allocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + + WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); + return (ENOTSUP); +} + +/* + * __win_handle_close -- + * Close a file handle. + */ +static int +__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_DECL_RET; + + /* + * Note: For directories, we do not open valid directory handles on + * windows since it is not possible to sync a directory + */ + if (fh->filehandle != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-close: CloseHandle", fh->name); + } + + if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle_secondary) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-close: secondary: CloseHandle", fh->name); + } + return (ret); +} + +/* + * __win_handle_getc -- + * ANSI C fgetc. + */ +static int +__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, + ENOTSUP, "%s: handle-getc: no stream configured", fh->name); + + *chp = fgetc(fh->fp); + if (*chp != EOF || !ferror(fh->fp)) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); +} + +/* + * __win_handle_lock -- + * Lock/unlock a file. + */ +static int +__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_DECL_RET; + + /* + * WiredTiger requires this function be able to acquire locks past + * the end of file. + * + * Note we're using fcntl(2) locking: all fcntl locks associated with a + * file for a given process are removed when any file descriptor for the + * file is closed by the process, even if a lock was never requested for + * that file descriptor. + * + * http://msdn.microsoft.com/ + * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx + * + * You can lock bytes that are beyond the end of the current file. + * This is useful to coordinate adding records to the end of a file. + */ + if (lock) { + if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-lock: LockFile", fh->name); + } + } else + if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-lock: UnlockFile", fh->name); + } + return (ret); +} + +/* + * __win_handle_printf -- + * ANSI C vfprintf. + */ +static int +__win_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); +} + +/* + * __win_handle_read -- + * Read a chunk. + */ +static int +__win_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + DWORD chunk, nr; + uint8_t *addr; + OVERLAPPED overlapped = { 0 }; + + nr = 0; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); + overlapped.Offset = UINT32_MAX & offset; + overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); + + if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) + WT_RET_MSG(session, + nr == 0 ? WT_ERROR : __wt_win32_errno(), + "%s: handle-read: ReadFile: failed to read %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __win_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + LARGE_INTEGER size; + + if (GetFileSizeEx(fh->filehandle, &size) != 0) { + *sizep = size.QuadPart; + return (0); + } + + WT_RET_MSG(session, + __wt_win32_errno(), "%s: handle-size: GetFileSizeEx", fh->name); +} + +/* + * __win_handle_sync -- + * MSVC fflush/fsync. + */ +static int +__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_DECL_RET; + + if (fh->fp == NULL) { + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, + * and won't make further attempts. + */ + if (!block) + return (ENOTSUP); + + if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) + WT_RET_MSG(session, __wt_win32_errno(), + "%s handle-sync: FlushFileBuffers error", fh->name); + return (0); + } + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); +} + +/* + * __win_handle_truncate -- + * Truncate a file. + */ +static int +__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + LARGE_INTEGER largeint; + + largeint.QuadPart = len; + + if (fh->filehandle_secondary == INVALID_HANDLE_VALUE) + WT_RET_MSG(session, EINVAL, + "%s: handle-truncate: read-only", fh->name); + + if (SetFilePointerEx( + fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) + WT_RET_MSG(session, __wt_win32_errno(), + "%s: handle-truncate: SetFilePointerEx", fh->name); + + if (SetEndOfFile(fh->filehandle_secondary) == FALSE) { + if (GetLastError() == ERROR_USER_MAPPED_FILE) + return (EBUSY); + WT_RET_MSG(session, __wt_win32_errno(), + "%s: handle-truncate: SetEndOfFile error", fh->name); + } + return (0); +} + +/* + * __win_handle_write -- + * Write a chunk. + */ +static int +__win_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + DWORD chunk; + DWORD nw; + const uint8_t *addr; + OVERLAPPED overlapped = { 0 }; + + nw = 0; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); + overlapped.Offset = UINT32_MAX & offset; + overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); + + if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) + WT_RET_MSG(session, __wt_win32_errno(), + "%s: handle-write: WriteFile: failed to write %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __win_handle_open -- + * Open a file handle. + */ +static int +__win_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) +{ + DWORD dwCreationDisposition; + HANDLE filehandle, filehandle_secondary; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + int f, fd, share_mode; + bool direct_io; + const char *stream_mode; + + conn = S2C(session); + direct_io = false; + + /* Set up error handling. */ + fh->filehandle = fh->filehandle_secondary = + filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; + fh->fp = NULL; + + /* + * Opening a file handle on a directory is only to support filesystems + * that require a directory sync for durability, and Windows doesn't + * require that functionality: create an empty WT_FH structure with + * invalid handles. + */ + if (file_type == WT_FILE_TYPE_DIRECTORY) + goto directory_open; + + share_mode = FILE_SHARE_READ; + if (!LF_ISSET(WT_OPEN_READONLY)) + share_mode |= FILE_SHARE_WRITE; + + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. + * + * TODO: Set tighter file permissions but set bInheritHandle to false + * to prevent inheritance + */ + f = FILE_ATTRIBUTE_NORMAL; + + dwCreationDisposition = 0; + if (LF_ISSET(WT_OPEN_CREATE)) { + dwCreationDisposition = CREATE_NEW; + if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + dwCreationDisposition = CREATE_ALWAYS; + } else + dwCreationDisposition = OPEN_EXISTING; + + /* + * direct_io means no OS file caching. This requires aligned buffer + * allocations like O_DIRECT. + */ + if (FLD_ISSET(conn->direct_io, file_type) || + (LF_ISSET(WT_OPEN_READONLY) && + file_type == WT_FILE_TYPE_DATA && + FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { + f |= FILE_FLAG_NO_BUFFERING; + direct_io = true; + } + fh->direct_io = direct_io; + + /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ + if (FLD_ISSET(conn->write_through, file_type)) + f |= FILE_FLAG_WRITE_THROUGH; + + if (file_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) + f |= FILE_FLAG_WRITE_THROUGH; + + /* Disable read-ahead on trees: it slows down random read workloads. */ + if (file_type == WT_FILE_TYPE_DATA) + f |= FILE_FLAG_RANDOM_ACCESS; + + filehandle = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, + share_mode, NULL, dwCreationDisposition, f, NULL); + if (filehandle == INVALID_HANDLE_VALUE) { + if (LF_ISSET(WT_OPEN_CREATE) && + GetLastError() == ERROR_FILE_EXISTS) + filehandle = CreateFileA( + name, GENERIC_READ | GENERIC_WRITE, share_mode, + NULL, OPEN_EXISTING, f, NULL); + if (filehandle == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_win32_errno(), + direct_io ? + "%s: handle-open: CreateFileA: failed with direct " + "I/O configured, some filesystem types do not " + "support direct I/O" : + "%s: handle-open: CreateFileA", name); + } + + /* + * Open a second handle to file to support allocation/truncation + * concurrently with reads on the file. Writes would also move the file + * pointer. + */ + if (!LF_ISSET(WT_OPEN_READONLY)) { + filehandle_secondary = CreateFileA(name, + GENERIC_READ | GENERIC_WRITE, + share_mode, NULL, OPEN_EXISTING, f, NULL); + if (filehandle_secondary == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_win32_errno(), + "%s: handle-open: CreateFileA: secondary", name); + } + + /* Optionally configure a stdio stream API. */ + switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { + case WT_STREAM_APPEND: + f = _O_APPEND | _O_TEXT; + stream_mode = "a"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case WT_STREAM_READ: + f = _O_RDONLY | _O_TEXT; + stream_mode = "r"; + break; + case WT_STREAM_WRITE: + f = _O_TEXT; + stream_mode = "w"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case 0: + default: + stream_mode = NULL; + break; + } + if (stream_mode != NULL) { + if ((fd = _open_osfhandle((intptr_t)filehandle, f)) == -1) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: _open_osfhandle", name); + if ((fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); + if (LF_ISSET(WT_STREAM_LINE_BUFFER)) + __wt_stream_set_line_buffer(fh->fp); + } + + /* Configure fallocate/posix_fallocate calls. */ + __win_handle_allocate_configure(session, fh); + +directory_open: + fh->filehandle = filehandle; + fh->filehandle_secondary = filehandle_secondary; + + fh->fh_advise = __win_handle_advise; + fh->fh_allocate = __win_handle_allocate; + fh->fh_close = __win_handle_close; + fh->fh_getc = __win_handle_getc; + fh->fh_lock = __win_handle_lock; + fh->fh_map = __wt_win_map; + fh->fh_map_discard = __wt_win_map_discard; + fh->fh_map_preload = __wt_win_map_preload; + fh->fh_map_unmap = __wt_win_map_unmap; + fh->fh_printf = __win_handle_printf; + fh->fh_read = __win_handle_read; + fh->fh_size = __win_handle_size; + fh->fh_sync = __win_handle_sync; + fh->fh_truncate = __win_handle_truncate; + fh->fh_write = __win_handle_write; + + return (0); + +err: if (filehandle != INVALID_HANDLE_VALUE) + (void)CloseHandle(filehandle); + if (filehandle_secondary != INVALID_HANDLE_VALUE) + (void)CloseHandle(filehandle_secondary); + + return (ret); +} + +/* + * __wt_os_win -- + * Initialize a MSVC configuration. + */ +int +__wt_os_win(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* Initialize the POSIX jump table. */ + conn->file_directory_list = __wt_win_directory_list; + conn->file_directory_sync = __win_directory_sync; + conn->file_exist = __win_file_exist; + conn->file_remove = __win_file_remove; + conn->file_rename = __win_file_rename; + conn->file_size = __win_file_size; + conn->handle_open = __win_handle_open; + + return (0); +} + +/* + * __wt_os_win_cleanup -- + * Discard a POSIX configuration. + */ +int +__wt_os_win_cleanup(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (0); +} diff --git a/src/os_win/os_win.c b/src/os_win/os_win.c deleted file mode 100644 index b82845771eb..00000000000 --- a/src/os_win/os_win.c +++ /dev/null @@ -1,676 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __win_directory_sync -- - * Flush a directory to ensure a file creation is durable. - */ -static int -__win_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -} - -/* - * __win_file_exist -- - * Return if the file exists. - */ -static int -__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) -{ - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - - ret = GetFileAttributesA(path); - - __wt_free(session, path); - - if (ret != INVALID_FILE_ATTRIBUTES) - *existp = true; - else - *existp = false; - - return (0); -} - -/* - * __win_file_remove -- - * Remove a file. - */ -static int -__win_file_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - char *path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, name, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-remove: file has open handles", name); -#endif - - WT_RET(__wt_filename(session, name, &path)); - name = path; - - if (DeleteFileA(name) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, "%s: file-remove: DeleteFileA", name); - } - - __wt_free(session, path); - return (ret); -} - -/* - * __win_file_rename -- - * Rename a file. - */ -static int -__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_DECL_RET; - char *from_path, *to_path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, from, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", from); - if (__wt_handle_search(session, to, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", to); -#endif - - from_path = to_path = NULL; - WT_ERR(__wt_filename(session, from, &from_path)); - from = from_path; - WT_ERR(__wt_filename(session, to, &to_path)); - to = to_path; - - /* - * Check if file exists since Windows does not override the file if - * it exists. - */ - if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) - if (DeleteFileA(to) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); - } - - if (ret == 0 && MoveFileA(from, to) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); - } - -err: __wt_free(session, from_path); - __wt_free(session, to_path); - return (ret); -} - -/* - * __win_file_size -- - * Get the size of a file in bytes, by file name. - */ -static int -__win_file_size( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) -{ - WIN32_FILE_ATTRIBUTE_DATA data; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - - ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); - - __wt_free(session, path); - - if (ret != 0) { - *sizep = - ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; - return (0); - } - - /* - * Some callers of this function expect failure if the file doesn't - * exist, and don't want an error message logged. - */ - ret = __wt_win32_errno(); - if (!silent) - WT_RET_MSG(session, ret, - "%s: file-size: GetFileAttributesEx", name); - return (ret); -} - -/* - * __win_handle_advise -- - * MSVC fadvise. - */ -static int -__win_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - - /* Quietly fail, callers expect not-supported failures. */ - return (ENOTSUP); -} - -/* - * __win_handle_allocate_configure -- - * Configure fallocate behavior for a file handle. - */ -static void -__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_UNUSED(session); - - /* - * fallocate on Windows would be implemented using SetEndOfFile, which - * can also truncate the file. WiredTiger expects fallocate to ignore - * requests to truncate the file which Windows does not do, so we don't - * support the call. - */ - fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; - fh->fallocate_requires_locking = false; -} - -/* - * __win_handle_allocate -- - * Allocate space for a file handle. - */ -static int -__win_handle_allocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - - WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); - return (ENOTSUP); -} - -/* - * __win_handle_close -- - * Close a file handle. - */ -static int -__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - - /* - * Note: For directories, we do not open valid directory handles on - * windows since it is not possible to sync a directory - */ - if (fh->filehandle != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle) == 0) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-close: CloseHandle", fh->name); - } - - if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle_secondary) == 0) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-close: secondary: CloseHandle", fh->name); - } - return (ret); -} - -/* - * __win_handle_getc -- - * ANSI C fgetc. - */ -static int -__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, - ENOTSUP, "%s: handle-getc: no stream configured", fh->name); - - *chp = fgetc(fh->fp); - if (*chp != EOF || !ferror(fh->fp)) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); -} - -/* - * __win_handle_lock -- - * Lock/unlock a file. - */ -static int -__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) -{ - WT_DECL_RET; - - /* - * WiredTiger requires this function be able to acquire locks past - * the end of file. - * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - * - * http://msdn.microsoft.com/ - * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx - * - * You can lock bytes that are beyond the end of the current file. - * This is useful to coordinate adding records to the end of a file. - */ - if (lock) { - if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-lock: LockFile", fh->name); - } - } else - if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-lock: UnlockFile", fh->name); - } - return (ret); -} - -/* - * __win_handle_printf -- - * ANSI C vfprintf. - */ -static int -__win_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, ENOTSUP, - "%s: vfprintf: no stream configured", fh->name); - - if (vfprintf(fh->fp, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); -} - -/* - * __win_handle_read -- - * Read a chunk. - */ -static int -__win_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - DWORD chunk, nr; - uint8_t *addr; - OVERLAPPED overlapped = { 0 }; - - nr = 0; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); - overlapped.Offset = UINT32_MAX & offset; - overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - - if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) - WT_RET_MSG(session, - nr == 0 ? WT_ERROR : __wt_win32_errno(), - "%s: handle-read: ReadFile: failed to read %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __win_handle_size -- - * Get the size of a file in bytes, by file handle. - */ -static int -__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - LARGE_INTEGER size; - - if (GetFileSizeEx(fh->filehandle, &size) != 0) { - *sizep = size.QuadPart; - return (0); - } - - WT_RET_MSG(session, - __wt_win32_errno(), "%s: handle-size: GetFileSizeEx", fh->name); -} - -/* - * __win_handle_sync -- - * MSVC fflush/fsync. - */ -static int -__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) -{ - WT_DECL_RET; - - if (fh->fp == NULL) { - /* - * Callers attempting asynchronous flush handle ENOTSUP returns, - * and won't make further attempts. - */ - if (!block) - return (ENOTSUP); - - if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) - WT_RET_MSG(session, __wt_win32_errno(), - "%s handle-sync: FlushFileBuffers error", fh->name); - return (0); - } - - if (fflush(fh->fp) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); -} - -/* - * __win_handle_truncate -- - * Truncate a file. - */ -static int -__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - LARGE_INTEGER largeint; - - largeint.QuadPart = len; - - if (fh->filehandle_secondary == INVALID_HANDLE_VALUE) - WT_RET_MSG(session, EINVAL, - "%s: handle-truncate: read-only", fh->name); - - if (SetFilePointerEx( - fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) - WT_RET_MSG(session, __wt_win32_errno(), - "%s: handle-truncate: SetFilePointerEx", fh->name); - - if (SetEndOfFile(fh->filehandle_secondary) == FALSE) { - if (GetLastError() == ERROR_USER_MAPPED_FILE) - return (EBUSY); - WT_RET_MSG(session, __wt_win32_errno(), - "%s: handle-truncate: SetEndOfFile error", fh->name); - } - return (0); -} - -/* - * __win_handle_write -- - * Write a chunk. - */ -static int -__win_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - DWORD chunk; - DWORD nw; - const uint8_t *addr; - OVERLAPPED overlapped = { 0 }; - - nw = 0; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); - overlapped.Offset = UINT32_MAX & offset; - overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - - if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) - WT_RET_MSG(session, __wt_win32_errno(), - "%s: handle-write: WriteFile: failed to write %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __win_handle_open -- - * Open a file handle. - */ -static int -__win_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) -{ - DWORD dwCreationDisposition; - HANDLE filehandle, filehandle_secondary; - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - int f, fd, share_mode; - bool direct_io; - const char *stream_mode; - - conn = S2C(session); - direct_io = false; - - /* Set up error handling. */ - fh->filehandle = fh->filehandle_secondary = - filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; - fh->fp = NULL; - - /* - * Opening a file handle on a directory is only to support filesystems - * that require a directory sync for durability, and Windows doesn't - * require that functionality: create an empty WT_FH structure with - * invalid handles. - */ - if (file_type == WT_FILE_TYPE_DIRECTORY) - goto directory_open; - - share_mode = FILE_SHARE_READ; - if (!LF_ISSET(WT_OPEN_READONLY)) - share_mode |= FILE_SHARE_WRITE; - - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. - * - * TODO: Set tighter file permissions but set bInheritHandle to false - * to prevent inheritance - */ - f = FILE_ATTRIBUTE_NORMAL; - - dwCreationDisposition = 0; - if (LF_ISSET(WT_OPEN_CREATE)) { - dwCreationDisposition = CREATE_NEW; - if (LF_ISSET(WT_OPEN_EXCLUSIVE)) - dwCreationDisposition = CREATE_ALWAYS; - } else - dwCreationDisposition = OPEN_EXISTING; - - /* - * direct_io means no OS file caching. This requires aligned buffer - * allocations like O_DIRECT. - */ - if (FLD_ISSET(conn->direct_io, file_type) || - (LF_ISSET(WT_OPEN_READONLY) && - file_type == WT_FILE_TYPE_DATA && - FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { - f |= FILE_FLAG_NO_BUFFERING; - direct_io = true; - } - fh->direct_io = direct_io; - - /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ - if (FLD_ISSET(conn->write_through, file_type)) - f |= FILE_FLAG_WRITE_THROUGH; - - if (file_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) - f |= FILE_FLAG_WRITE_THROUGH; - - /* Disable read-ahead on trees: it slows down random read workloads. */ - if (file_type == WT_FILE_TYPE_DATA) - f |= FILE_FLAG_RANDOM_ACCESS; - - filehandle = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, - share_mode, NULL, dwCreationDisposition, f, NULL); - if (filehandle == INVALID_HANDLE_VALUE) { - if (LF_ISSET(WT_OPEN_CREATE) && - GetLastError() == ERROR_FILE_EXISTS) - filehandle = CreateFileA( - name, GENERIC_READ | GENERIC_WRITE, share_mode, - NULL, OPEN_EXISTING, f, NULL); - if (filehandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_win32_errno(), - direct_io ? - "%s: handle-open: CreateFileA: failed with direct " - "I/O configured, some filesystem types do not " - "support direct I/O" : - "%s: handle-open: CreateFileA", name); - } - - /* - * Open a second handle to file to support allocation/truncation - * concurrently with reads on the file. Writes would also move the file - * pointer. - */ - if (!LF_ISSET(WT_OPEN_READONLY)) { - filehandle_secondary = CreateFileA(name, - GENERIC_READ | GENERIC_WRITE, - share_mode, NULL, OPEN_EXISTING, f, NULL); - if (filehandle_secondary == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_win32_errno(), - "%s: handle-open: CreateFileA: secondary", name); - } - - /* Optionally configure a stdio stream API. */ - switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { - case WT_STREAM_APPEND: - f = _O_APPEND | _O_TEXT; - stream_mode = "a"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case WT_STREAM_READ: - f = _O_RDONLY | _O_TEXT; - stream_mode = "r"; - break; - case WT_STREAM_WRITE: - f = _O_TEXT; - stream_mode = "w"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case 0: - default: - stream_mode = NULL; - break; - } - if (stream_mode != NULL) { - if ((fd = _open_osfhandle((intptr_t)filehandle, f)) == -1) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: _open_osfhandle", name); - if ((fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: fdopen", name); - if (LF_ISSET(WT_STREAM_LINE_BUFFER)) - __wt_stream_set_line_buffer(fh->fp); - } - - /* Configure fallocate/posix_fallocate calls. */ - __win_handle_allocate_configure(session, fh); - -directory_open: - fh->filehandle = filehandle; - fh->filehandle_secondary = filehandle_secondary; - - fh->fh_advise = __win_handle_advise; - fh->fh_allocate = __win_handle_allocate; - fh->fh_close = __win_handle_close; - fh->fh_getc = __win_handle_getc; - fh->fh_lock = __win_handle_lock; - fh->fh_map = __wt_win_map; - fh->fh_map_discard = __wt_win_map_discard; - fh->fh_map_preload = __wt_win_map_preload; - fh->fh_map_unmap = __wt_win_map_unmap; - fh->fh_printf = __win_handle_printf; - fh->fh_read = __win_handle_read; - fh->fh_size = __win_handle_size; - fh->fh_sync = __win_handle_sync; - fh->fh_truncate = __win_handle_truncate; - fh->fh_write = __win_handle_write; - - return (0); - -err: if (filehandle != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle); - if (filehandle_secondary != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle_secondary); - - return (ret); -} - -/* - * __wt_os_win -- - * Initialize a MSVC configuration. - */ -int -__wt_os_win(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - - conn = S2C(session); - - /* Initialize the POSIX jump table. */ - conn->file_directory_list = __wt_win_directory_list; - conn->file_directory_sync = __win_directory_sync; - conn->file_exist = __win_file_exist; - conn->file_remove = __win_file_remove; - conn->file_rename = __win_file_rename; - conn->file_size = __win_file_size; - conn->handle_open = __win_handle_open; - - return (0); -} - -/* - * __wt_os_win_cleanup -- - * Discard a POSIX configuration. - */ -int -__wt_os_win_cleanup(WT_SESSION_IMPL *session) -{ - WT_UNUSED(session); - - return (0); -} diff --git a/src/support/filename.c b/src/support/filename.c deleted file mode 100644 index e8edceaa1e2..00000000000 --- a/src/support/filename.c +++ /dev/null @@ -1,192 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_filename -- - * Build a file name in a scratch buffer, automatically calculate the - * length of the file name. - */ -int -__wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) -{ - return (__wt_nfilename(session, name, strlen(name), path)); -} - -/* - * __wt_nfilename -- - * Build a file name in a scratch buffer. If the name is already an - * absolute path duplicate it, otherwise generate a path relative to the - * connection home directory. - * Needs to work with a NULL session handle - since this is called via - * the exists API which is used by the test utilities. - */ -int -__wt_nfilename( - WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) -{ - size_t len; - char *buf; - - *path = NULL; - - if (session == NULL || __wt_absolute_path(name)) - WT_RET(__wt_strndup(session, name, namelen, path)); - else { - len = strlen(S2C(session)->home) + 1 + namelen + 1; - WT_RET(__wt_calloc(session, 1, len, &buf)); - snprintf(buf, len, "%s%s%.*s", S2C(session)->home, - __wt_path_separator(), (int)namelen, name); - *path = buf; - } - - return (0); -} - -/* - * __wt_remove_if_exists -- - * Remove a file if it exists. - */ -int -__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name) -{ - bool exist; - - WT_RET(__wt_exist(session, name, &exist)); - if (exist) - WT_RET(__wt_remove(session, name)); - return (0); -} - -/* - * __wt_rename_and_sync_directory -- - * Rename a file and sync the enclosing directory. - */ -int -__wt_rename_and_sync_directory( - WT_SESSION_IMPL *session, const char *from, const char *to) -{ - const char *fp, *tp; - bool same_directory; - - /* Rename the source file to the target. */ - WT_RET(__wt_rename(session, from, to)); - - /* - * Flush the backing directory to guarantee the rename. My reading of - * POSIX 1003.1 is there's no guarantee flushing only one of the from - * or to directories, or flushing a common parent, is sufficient, and - * even if POSIX were to make that guarantee, existing filesystems are - * known to not provide the guarantee or only provide the guarantee - * with specific mount options. Flush both of the from/to directories - * until it's a performance problem. - */ - WT_RET(__wt_directory_sync(session, from)); - - /* - * In almost all cases, we're going to be renaming files in the same - * directory, we can at least fast-path that. - */ - fp = strrchr(from, '/'); - tp = strrchr(to, '/'); - same_directory = (fp == NULL && tp == NULL) || - (fp != NULL && tp != NULL && - fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0); - - return (same_directory ? 0 : __wt_directory_sync(session, to)); -} - -/* - * __wt_sync_handle_and_rename -- - * Sync and close a handle, and swap it into place. - */ -int -__wt_sync_handle_and_rename( - WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to) -{ - WT_DECL_RET; - WT_FH *fh; - - fh = *fhp; - *fhp = NULL; - - /* Flush to disk and close the handle. */ - ret = __wt_fsync(session, fh, true); - WT_TRET(__wt_close(session, &fh)); - WT_RET(ret); - - return (__wt_rename_and_sync_directory(session, from, to)); -} - -/* - * __wt_copy_and_sync -- - * Copy a file safely; here to support the wt utility. - */ -int -__wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) -{ - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_FH *ffh, *tfh; - WT_SESSION_IMPL *session; - size_t n; - wt_off_t offset, size; - char *buf; - - session = (WT_SESSION_IMPL *)wt_session; - ffh = tfh = NULL; - buf = NULL; - - /* - * Remove the target file if it exists, then create a temporary file, - * copy the original into it and rename it into place. I don't think - * its necessary to remove the file, or create a copy and do a rename, - * it's likely safe to overwrite the backup file directly. I'm doing - * the remove and rename to insulate us from errors in other programs - * that might not detect a corrupted backup file; it's cheap insurance - * in a path where undetected failure is very bad. - */ - WT_ERR(__wt_scr_alloc(session, 0, &tmp)); - WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to)); - - WT_ERR(__wt_remove_if_exists(session, to)); - WT_ERR(__wt_remove_if_exists(session, tmp->data)); - - /* Open the from and temporary file handles. */ - WT_ERR(__wt_open(session, from, - WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &ffh)); - WT_ERR(__wt_open(session, tmp->data, - WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh)); - - /* - * Allocate a copy buffer. Don't use a scratch buffer, this thing is - * big, and we don't want it hanging around. - */ -#define WT_BACKUP_COPY_SIZE (128 * 1024) - WT_ERR(__wt_malloc(session, WT_BACKUP_COPY_SIZE, &buf)); - - /* Get the file's size, then copy the bytes. */ - WT_ERR(__wt_filesize(session, ffh, &size)); - for (offset = 0; size > 0; size -= n, offset += n) { - n = (size_t)WT_MIN(size, WT_BACKUP_COPY_SIZE); - WT_ERR(__wt_read(session, ffh, offset, n, buf)); - WT_ERR(__wt_write(session, tfh, offset, n, buf)); - } - - /* Close the from handle, then swap the temporary file into place. */ - WT_ERR(__wt_close(session, &ffh)); - ret = __wt_sync_handle_and_rename(session, &tfh, tmp->data, to); - -err: WT_TRET(__wt_close(session, &ffh)); - WT_TRET(__wt_close(session, &tfh)); - - __wt_free(session, buf); - __wt_scr_free(session, &tmp); - return (ret); -} -- cgit v1.2.1 From 1137af3c5893473ee63df00619049dad3745561d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 09:41:35 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Whitespace, re-position a comment for clarity. --- src/os_common/filename.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/os_common/filename.c b/src/os_common/filename.c index e8edceaa1e2..83a1a985378 100644 --- a/src/os_common/filename.c +++ b/src/os_common/filename.c @@ -24,8 +24,6 @@ __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) * Build a file name in a scratch buffer. If the name is already an * absolute path duplicate it, otherwise generate a path relative to the * connection home directory. - * Needs to work with a NULL session handle - since this is called via - * the exists API which is used by the test utilities. */ int __wt_nfilename( @@ -36,6 +34,10 @@ __wt_nfilename( *path = NULL; + /* + * Needs to work with a NULL session handle - since this is called via + * the exists API which is used by the test utilities. + */ if (session == NULL || __wt_absolute_path(name)) WT_RET(__wt_strndup(session, name, namelen, path)); else { -- cgit v1.2.1 From fcc6d047fa36d302be5cb4d24c43083829a8fa6f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 10:07:24 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files MSVC doesn't support line-buffering, it's the same as full-buffering. Turn on no-buffering instead. --- build_win/filelist.win | 2 +- dist/filelist | 2 +- dist/s_style | 2 +- dist/s_win | 4 +++- src/os_common/os_setvbuf.c | 34 ---------------------------------- src/os_posix/os_setvbuf.c | 34 ++++++++++++++++++++++++++++++++++ src/os_win/os_setvbuf.c | 38 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 78 insertions(+), 38 deletions(-) delete mode 100644 src/os_common/os_setvbuf.c create mode 100644 src/os_posix/os_setvbuf.c create mode 100644 src/os_win/os_setvbuf.c diff --git a/build_win/filelist.win b/build_win/filelist.win index 323e45d0305..b530fb7f1bc 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -110,7 +110,6 @@ src/os_common/os_fs_stdio.c src/os_common/os_getline.c src/os_common/os_getopt.c src/os_common/os_init.c -src/os_common/os_setvbuf.c src/os_common/os_strtouq.c src/os_win/os_dir.c src/os_win/os_dlopen.c @@ -123,6 +122,7 @@ src/os_win/os_once.c src/os_win/os_pagesize.c src/os_win/os_path.c src/os_win/os_priv.c +src/os_win/os_setvbuf.c src/os_win/os_sleep.c src/os_win/os_snprintf.c src/os_win/os_thread.c diff --git a/dist/filelist b/dist/filelist index 2b229e5fa91..4b2ca809df7 100644 --- a/dist/filelist +++ b/dist/filelist @@ -110,7 +110,6 @@ src/os_common/os_fs_stdio.c src/os_common/os_getline.c src/os_common/os_getopt.c src/os_common/os_init.c -src/os_common/os_setvbuf.c src/os_common/os_strtouq.c src/os_posix/os_dir.c src/os_posix/os_dlopen.c @@ -124,6 +123,7 @@ src/os_posix/os_once.c src/os_posix/os_pagesize.c src/os_posix/os_path.c src/os_posix/os_priv.c +src/os_posix/os_setvbuf.c src/os_posix/os_sleep.c src/os_posix/os_thread.c src/os_posix/os_time.c diff --git a/dist/s_style b/dist/s_style index 1222318e1ad..a163eb83b25 100755 --- a/dist/s_style +++ b/dist/s_style @@ -84,7 +84,7 @@ else cat $t } - if ! expr "$f" : 'src/os_common/os_setvbuf.c' > /dev/null && + if ! expr "$f" : 'src/.*/os_setvbuf.c' > /dev/null && egrep -w 'setvbuf' $f > $t; then echo "$f: setvbuf call, use WiredTiger library replacements" cat $t diff --git a/dist/s_win b/dist/s_win index 24390b0a120..ff8e1638f69 100755 --- a/dist/s_win +++ b/dist/s_win @@ -49,6 +49,7 @@ win_filelist() -e '/\/os_posix\//d' \ -e '/src\/support\/power8\/crc32.S/d' \ -e '/src\/support\/power8\/crc32_wrapper.c/d' + echo 'src/os_win/os_dir.c' echo 'src/os_win/os_dlopen.c' echo 'src/os_win/os_errno.c' @@ -60,6 +61,7 @@ win_filelist() echo 'src/os_win/os_pagesize.c' echo 'src/os_win/os_path.c' echo 'src/os_win/os_priv.c' + echo 'src/os_win/os_setvbuf.c' echo 'src/os_win/os_sleep.c' echo 'src/os_win/os_snprintf.c' echo 'src/os_win/os_thread.c' @@ -67,7 +69,7 @@ win_filelist() echo 'src/os_win/os_vsnprintf.c' echo 'src/os_win/os_yield.c') < filelist | sort > $t - cmp $t $f > /dev/null 2>&1 || + cmp $t $f > /dev/null 2>&1 || (echo "Building $f" && rm -f $f && cp $t $f) } diff --git a/src/os_common/os_setvbuf.c b/src/os_common/os_setvbuf.c deleted file mode 100644 index d6107115eb3..00000000000 --- a/src/os_common/os_setvbuf.c +++ /dev/null @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_stream_set_line_buffer -- - * Set line buffering on a stream. - */ -void -__wt_stream_set_line_buffer(FILE *fp) -{ - /* - * This function exists because MSVC doesn't support buffer sizes of 0 - * to the setvbuf call. To avoid re-introducing the bug, we have helper - * functions and disallow calling setvbuf directly in WiredTiger code. - */ - (void)setvbuf(fp, NULL, _IOLBF, 1024); -} - -/* - * __wt_stream_set_no_buffer -- - * Turn off buffering on a stream. - */ -void -__wt_stream_set_no_buffer(FILE *fp) -{ - (void)setvbuf(fp, NULL, _IONBF, 0); -} diff --git a/src/os_posix/os_setvbuf.c b/src/os_posix/os_setvbuf.c new file mode 100644 index 00000000000..d6107115eb3 --- /dev/null +++ b/src/os_posix/os_setvbuf.c @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_stream_set_line_buffer -- + * Set line buffering on a stream. + */ +void +__wt_stream_set_line_buffer(FILE *fp) +{ + /* + * This function exists because MSVC doesn't support buffer sizes of 0 + * to the setvbuf call. To avoid re-introducing the bug, we have helper + * functions and disallow calling setvbuf directly in WiredTiger code. + */ + (void)setvbuf(fp, NULL, _IOLBF, 1024); +} + +/* + * __wt_stream_set_no_buffer -- + * Turn off buffering on a stream. + */ +void +__wt_stream_set_no_buffer(FILE *fp) +{ + (void)setvbuf(fp, NULL, _IONBF, 0); +} diff --git a/src/os_win/os_setvbuf.c b/src/os_win/os_setvbuf.c new file mode 100644 index 00000000000..3eef07915c1 --- /dev/null +++ b/src/os_win/os_setvbuf.c @@ -0,0 +1,38 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_stream_set_line_buffer -- + * Set line buffering on a stream. + */ +void +__wt_stream_set_line_buffer(FILE *fp) +{ + /* + * This function exists because MSVC doesn't support buffer sizes of 0 + * to the setvbuf call. To avoid re-introducing the bug, we have helper + * functions and disallow calling setvbuf directly in WiredTiger code. + * + * Additionally, MSVC doesn't support line buffering, the result is the + * same as full-buffering. We assume our caller wants immediate output, + * set no-buffering instead. + */ + return (__wt_stream_set_no_buffer(fp)); +} + +/* + * __wt_stream_set_no_buffer -- + * Turn off buffering on a stream. + */ +void +__wt_stream_set_no_buffer(FILE *fp) +{ + (void)setvbuf(fp, NULL, _IONBF, 0); +} -- cgit v1.2.1 From 5039556f7853264e9b11422f7e52b208562e9e32 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 12:28:56 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Now the mmap functions are methods off the WT_FH handle, their verbose messges should be HANDLEOPS, not FILEOPS. --- src/os_posix/os_map.c | 4 ++-- src/os_win/os_map.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 8d8168362e2..304bb32df31 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -37,7 +37,7 @@ __wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) WT_RET(__wt_filesize(session, fh, &file_size)); len = (size_t)file_size; - (void)__wt_verbose(session, WT_VERB_FILEOPS, + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len); if ((map = mmap(NULL, len, @@ -177,7 +177,7 @@ __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - (void)__wt_verbose(session, WT_VERB_FILEOPS, + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); if (munmap(map, len) == 0) diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index 5156310945c..feee2899937 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -28,7 +28,7 @@ __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) WT_RET(__wt_filesize(session, fh, &file_size)); len = (size_t)file_size; - (void)__wt_verbose(session, WT_VERB_FILEOPS, + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len); fh->maphandle = @@ -93,7 +93,7 @@ __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) { WT_DECL_RET; - (void)__wt_verbose(session, WT_VERB_FILEOPS, + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); if (UnmapViewOfFile(map) == 0) { -- cgit v1.2.1 From 4fc247a4c3c81eb992dda02b88baffcfd74ef266 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 12:34:08 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Windows was leaking handles: if we open a stream with stdio, close it with fclose; my reading of the MSVC documentation is fclose will close the underlying handles returned by _open_osfhandle and CreateFileA. --- src/os_posix/os_fs_posix.c | 12 ++++++------ src/os_win/os_fs_win.c | 37 ++++++++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/src/os_posix/os_fs_posix.c b/src/os_posix/os_fs_posix.c index 2d450b1df7f..2d0bee7ae05 100644 --- a/src/os_posix/os_fs_posix.c +++ b/src/os_posix/os_fs_posix.c @@ -290,7 +290,6 @@ static int __posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) { WT_DECL_RET; - int tret; if (fh->fp == NULL) { WT_SYSCALL_RETRY(close(fh->fd), ret); @@ -299,17 +298,18 @@ __posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name); } - /* If the handle was opened for writing, flush the file. */ + /* If the stream was opened for writing, flush the file. */ if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { ret = __wt_errno(); __wt_err(session, ret, "%s: handle-close: fflush", fh->name); } - if ((tret = fclose(fh->fp)) != 0) { - tret = __wt_errno(); - __wt_err(session, tret, "%s: handle-close: fclose", fh->name); + /* Close the file. */ + if (fclose(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, ret, "%s: handle-close: fclose", fh->name); } - return (ret == 0 ? tret : ret); + return (ret); } /* diff --git a/src/os_win/os_fs_win.c b/src/os_win/os_fs_win.c index b82845771eb..e7f67045572 100644 --- a/src/os_win/os_fs_win.c +++ b/src/os_win/os_fs_win.c @@ -217,17 +217,36 @@ __win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) { WT_DECL_RET; - /* - * Note: For directories, we do not open valid directory handles on - * windows since it is not possible to sync a directory - */ - if (fh->filehandle != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle) == 0) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-close: CloseHandle", fh->name); + if (fh->fp == NULL) { + /* + * We don't open Windows system handles when opening directories + * for flushing, since it is not necessary (or possible) to flush + * a directory on Windows. Confirm the file handle is set before + * attempting to close it. + */ + if (fh->filehandle != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-close: CloseHandle", fh->name); + } + } else { + /* If the stream was opened for writing, flush the file. */ + if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, + ret, "%s: handle-close: fflush", fh->name); + } + + /* Close the file, closing all the underlying handles. */ + if (fclose(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, + ret, "%s: handle-close: fclose", fh->name); + } } + /* Close the secondary handle. */ if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle_secondary) == 0) { ret = __wt_win32_errno(); -- cgit v1.2.1 From 6ee271cf24d1f2264fb6d962f15d156424a66075 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 12:39:51 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Remove the "posix" and "win" parts of the os_posix and os_win FS code file names, it's no longer needed. KNF fix for a long line. --- build_win/filelist.win | 2 +- dist/filelist | 2 +- dist/s_win | 2 +- src/os_posix/os_fs.c | 729 +++++++++++++++++++++++++++++++++++++++++++++ src/os_posix/os_fs_posix.c | 729 --------------------------------------------- src/os_win/os_fs.c | 695 ++++++++++++++++++++++++++++++++++++++++++ src/os_win/os_fs_win.c | 695 ------------------------------------------ 7 files changed, 1427 insertions(+), 1427 deletions(-) create mode 100644 src/os_posix/os_fs.c delete mode 100644 src/os_posix/os_fs_posix.c create mode 100644 src/os_win/os_fs.c delete mode 100644 src/os_win/os_fs_win.c diff --git a/build_win/filelist.win b/build_win/filelist.win index b530fb7f1bc..c370303d5f8 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -114,7 +114,7 @@ src/os_common/os_strtouq.c src/os_win/os_dir.c src/os_win/os_dlopen.c src/os_win/os_errno.c -src/os_win/os_fs_win.c +src/os_win/os_fs.c src/os_win/os_getenv.c src/os_win/os_map.c src/os_win/os_mtx_cond.c diff --git a/dist/filelist b/dist/filelist index 4b2ca809df7..1d7ffa76922 100644 --- a/dist/filelist +++ b/dist/filelist @@ -115,7 +115,7 @@ src/os_posix/os_dir.c src/os_posix/os_dlopen.c src/os_posix/os_errno.c src/os_posix/os_fallocate.c -src/os_posix/os_fs_posix.c +src/os_posix/os_fs.c src/os_posix/os_getenv.c src/os_posix/os_map.c src/os_posix/os_mtx_cond.c diff --git a/dist/s_win b/dist/s_win index ff8e1638f69..562e89f94c6 100755 --- a/dist/s_win +++ b/dist/s_win @@ -53,7 +53,7 @@ win_filelist() echo 'src/os_win/os_dir.c' echo 'src/os_win/os_dlopen.c' echo 'src/os_win/os_errno.c' - echo 'src/os_win/os_fs_win.c' + echo 'src/os_win/os_fs.c' echo 'src/os_win/os_getenv.c' echo 'src/os_win/os_map.c' echo 'src/os_win/os_mtx_cond.c' diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c new file mode 100644 index 00000000000..2d0bee7ae05 --- /dev/null +++ b/src/os_posix/os_fs.c @@ -0,0 +1,729 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __posix_sync -- + * Underlying support function to flush a file handle. + */ +static int +__posix_sync(WT_SESSION_IMPL *session, + int fd, const char *name, const char *func, bool block) +{ + WT_DECL_RET; + +#ifdef HAVE_SYNC_FILE_RANGE + if (!block) { + WT_SYSCALL_RETRY(sync_file_range(fd, + (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func); + } +#else + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, and + * won't make further attempts. + */ + if (!block) + return (ENOTSUP); +#endif + +#if defined(F_FULLFSYNC) + /* + * OS X fsync documentation: + * "Note that while fsync() will flush all data from the host to the + * drive (i.e. the "permanent storage device"), the drive itself may + * not physically write the data to the platters for quite some time + * and it may be written in an out-of-order sequence. For applications + * that require tighter guarantees about the integrity of their data, + * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks + * the drive to flush all buffered data to permanent storage." + * + * OS X F_FULLFSYNC fcntl documentation: + * "This is currently implemented on HFS, MS-DOS (FAT), and Universal + * Disk Format (UDF) file systems." + */ + WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); + if (ret == 0) + return (0); + /* + * Assume F_FULLFSYNC failed because the file system doesn't support it + * and fallback to fsync. + */ +#endif +#if defined(HAVE_FDATASYNC) + WT_SYSCALL_RETRY(fdatasync(fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: fdatasync", name, func); +#else + WT_SYSCALL_RETRY(fsync(fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: fsync", name, func); +#endif +} + +/* + * __posix_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static int +__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ +#ifdef __linux__ + WT_DECL_RET; + int fd, tret; + const char *dir; + char *copy; + + /* + * POSIX 1003.1 does not require that fsync of a file handle ensures the + * entry in the directory containing the file has also reached disk (and + * there are historic Linux filesystems requiring this), do an explicit + * fsync on a file descriptor for the directory to be sure. + */ + copy = NULL; + if (path == NULL || (dir = strrchr(path, '/')) == NULL) + path = S2C(session)->home; + else { + /* + * Copy the directory name, leaving the trailing slash in place, + * so a path of "/foo" doesn't result in an empty string. + */ + WT_RET(__wt_strndup( + session, path, (size_t)(dir - path) + 1, ©)); + path = copy; + } + + WT_SYSCALL_RETRY(( + (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_RET_MSG(session, ret, "%s: directory-sync: open", path); + + ret = __posix_sync(session, fd, path, "directory-sync", true); + + WT_SYSCALL_RETRY(close(fd), tret); + if (tret != 0) + __wt_err(session, tret, "%s: directory-sync: close", path); + return (ret == 0 ? tret : ret); +#else + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +#endif +} + +/* + * __posix_file_exist -- + * Return if the file exists. + */ +static int +__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + struct stat sb; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + WT_SYSCALL_RETRY(stat(name, &sb), ret); + if (ret == 0) + *existp = true; + else if (ret == ENOENT) { + *existp = false; + ret = 0; + } else + __wt_err(session, ret, "%s: file-exist: stat", name); + + __wt_free(session, path); + return (ret); +} + +/* + * __posix_file_remove -- + * Remove a file. + */ +static int +__posix_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + char *path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, name, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + WT_SYSCALL_RETRY(remove(name), ret); + if (ret != 0) + __wt_err(session, ret, "%s: file-remove: remove", name); + + __wt_free(session, path); + return (ret); +} + +/* + * __posix_file_rename -- + * Rename a file. + */ +static int +__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + char *from_path, *to_path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, from, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); + if (__wt_handle_search(session, to, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + from = from_path; + WT_ERR(__wt_filename(session, to, &to_path)); + to = to_path; + + WT_SYSCALL_RETRY(rename(from, to), ret); + if (ret != 0) + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); +} + +/* + * __posix_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__posix_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + /* + * Optionally don't log errors on ENOENT; some callers of this function + * expect failure in that case and don't want an error message logged. + */ + WT_SYSCALL_RETRY(stat(name, &sb), ret); + if (ret == 0) + *sizep = sb.st_size; + else if (ret != ENOENT || !silent) + __wt_err(session, ret, "%s: file-size: stat", name); + + __wt_free(session, path); + + return (ret); +} + +/* + * __posix_handle_advise -- + * POSIX fadvise. + */ +static int +__posix_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ +#if defined(HAVE_POSIX_FADVISE) + WT_DECL_RET; + + /* + * Refuse pre-load when direct I/O is configured for the file, the + * kernel cache isn't interesting. + */ + if (advice == POSIX_MADV_WILLNEED && fh->direct_io) + return (ENOTSUP); + + WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); + if (ret == 0) + return (0); + + /* + * Treat EINVAL as not-supported, some systems don't support some flags. + * Quietly fail, callers expect not-supported failures. + */ + if (ret == EINVAL) + return (ENOTSUP); + + WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name); +#else + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + + /* Quietly fail, callers expect not-supported failures. */ + return (ENOTSUP); +#endif +} + +/* + * __posix_handle_close -- + * ANSI C close/fclose. + */ +static int +__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_DECL_RET; + + if (fh->fp == NULL) { + WT_SYSCALL_RETRY(close(fh->fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name); + } + + /* If the stream was opened for writing, flush the file. */ + if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, ret, "%s: handle-close: fflush", fh->name); + } + + /* Close the file. */ + if (fclose(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, ret, "%s: handle-close: fclose", fh->name); + } + return (ret); +} + +/* + * __posix_handle_getc -- + * ANSI C fgetc. + */ +static int +__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, + ENOTSUP, "%s: handle-getc: no stream configured", fh->name); + + *chp = fgetc(fh->fp); + if (*chp != EOF || !ferror(fh->fp)) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); +} + +/* + * __posix_handle_lock -- + * Lock/unlock a file. + */ +static int +__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + struct flock fl; + WT_DECL_RET; + + /* + * WiredTiger requires this function be able to acquire locks past + * the end of file. + * + * Note we're using fcntl(2) locking: all fcntl locks associated with a + * file for a given process are removed when any file descriptor for the + * file is closed by the process, even if a lock was never requested for + * that file descriptor. + */ + fl.l_start = 0; + fl.l_len = 1; + fl.l_type = lock ? F_WRLCK : F_UNLCK; + fl.l_whence = SEEK_SET; + + WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name); +} + +/* + * __posix_handle_printf -- + * ANSI C vfprintf. + */ +static int +__posix_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); +} + +/* + * __posix_handle_read -- + * POSIX pread. + */ +static int +__posix_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + size_t chunk; + ssize_t nr; + uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) + WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), + "%s: handle-read: pread: failed to read %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __posix_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + + WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); + if (ret == 0) { + *sizep = sb.st_size; + return (0); + } + WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name); +} + +/* + * __posix_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + if (fh->fp == NULL) + return (__posix_sync( + session, fh->fd, fh->name, "handle-sync", block)); + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); +} + +/* + * __posix_handle_truncate -- + * POSIX ftruncate. + */ +static int +__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + + WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name); +} + +/* + * __posix_handle_write -- + * POSIX pwrite. + */ +static int +__posix_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + size_t chunk; + ssize_t nw; + const uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) + WT_RET_MSG(session, __wt_errno(), + "%s: handle-write: pwrite: failed to write %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __posix_handle_open_cloexec -- + * Prevent child access to file handles. + */ +static inline int +__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) +{ +#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) + int f; + + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. There's an obvious race + * between the open and this call, prefer the flag to open if available. + */ + if ((f = fcntl(fd, F_GETFD)) == -1 || + fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) + WT_RET_MSG(session, __wt_errno(), + "%s: handle-open: fcntl", name); + return (0); +#else + WT_UNUSED(session); + WT_UNUSED(fd); + WT_UNUSED(name); + return (0); +#endif +} + +/* + * __posix_handle_open -- + * Open a file handle. + */ +static int +__posix_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + mode_t mode; + int f, fd, tret; + bool direct_io; + const char *stream_mode; + + conn = S2C(session); + direct_io = false; + + /* Set up error handling. */ + fh->fd = fd = -1; + fh->fp = NULL; + + if (file_type == WT_FILE_TYPE_DIRECTORY) { + f = O_RDONLY; +#ifdef O_CLOEXEC + /* + * Security: + * The application may spawn a new process, and we don't want + * another process to have access to our file handles. + */ + f |= O_CLOEXEC; +#endif + WT_SYSCALL_RETRY(( + (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, "%s: handle-open: open", name); + WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + goto directory_open; + } + + f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; + if (LF_ISSET(WT_OPEN_CREATE)) { + f |= O_CREAT; + if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + f |= O_EXCL; + mode = 0666; + } else + mode = 0; + +#ifdef O_BINARY + /* Windows clones: we always want to treat the file as a binary. */ + f |= O_BINARY; +#endif +#ifdef O_CLOEXEC + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. + */ + f |= O_CLOEXEC; +#endif +#ifdef O_DIRECT + /* + * Direct I/O: file-type is a flag from the set of possible flags stored + * in the connection handle during configuration, check for a match. + * Also, "direct_io=checkpoint" configures direct I/O for readonly data + * files. + */ + if (FLD_ISSET(conn->direct_io, file_type) || + (LF_ISSET(WT_OPEN_READONLY) && + file_type == WT_FILE_TYPE_DATA && + FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { + f |= O_DIRECT; + direct_io = true; + } +#endif + fh->direct_io = direct_io; +#ifdef O_NOATIME + /* Avoid updating metadata for read-only workloads. */ + if (file_type == WT_FILE_TYPE_DATA) + f |= O_NOATIME; +#endif + + if (file_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { +#ifdef O_DSYNC + f |= O_DSYNC; +#elif defined(O_SYNC) + f |= O_SYNC; +#else + WT_ERR_MSG(session, ENOTSUP, + "unsupported log sync mode configured"); +#endif + } + + WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, + direct_io ? + "%s: handle-open: open: failed with direct I/O configured, " + "some filesystem types do not support direct I/O" : + "%s: handle-open: open", name); + WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + + /* Disable read-ahead on trees: it slows down random read workloads. */ +#if defined(HAVE_POSIX_FADVISE) + if (file_type == WT_FILE_TYPE_DATA) { + WT_SYSCALL_RETRY( + posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, + "%s: handle-open: posix_fadvise", name); + } +#endif + + /* Optionally configure a stdio stream API. */ + switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { + case WT_STREAM_APPEND: + stream_mode = "a"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case WT_STREAM_READ: + stream_mode = "r"; + break; + case WT_STREAM_WRITE: + stream_mode = "w"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case 0: + default: + stream_mode = NULL; + break; + } + if (stream_mode != NULL) { + if ((fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); + if (LF_ISSET(WT_STREAM_LINE_BUFFER)) + __wt_stream_set_line_buffer(fh->fp); + } + +directory_open: + fh->fd = fd; + + /* Configure fallocate calls. */ + __wt_posix_handle_allocate_configure(session, fh); + + fh->fh_advise = __posix_handle_advise; + fh->fh_allocate = __wt_posix_handle_allocate; + fh->fh_close = __posix_handle_close; + fh->fh_getc = __posix_handle_getc; + fh->fh_lock = __posix_handle_lock; + fh->fh_map = __wt_posix_map; + fh->fh_map_discard = __wt_posix_map_discard; + fh->fh_map_preload = __wt_posix_map_preload; + fh->fh_map_unmap = __wt_posix_map_unmap; + fh->fh_printf = __posix_handle_printf; + fh->fh_read = __posix_handle_read; + fh->fh_size = __posix_handle_size; + fh->fh_sync = __posix_handle_sync; + fh->fh_truncate = __posix_handle_truncate; + fh->fh_write = __posix_handle_write; + + return (0); + +err: if (fd != -1) { + WT_SYSCALL_RETRY(close(fd), tret); + if (tret != 0) + __wt_err(session, tret, "%s: handle-open: close", name); + } + return (ret); +} + +/* + * __wt_os_posix -- + * Initialize a POSIX configuration. + */ +int +__wt_os_posix(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* Initialize the POSIX jump table. */ + conn->file_directory_list = __wt_posix_directory_list; + conn->file_directory_sync = __posix_directory_sync; + conn->file_exist = __posix_file_exist; + conn->file_remove = __posix_file_remove; + conn->file_rename = __posix_file_rename; + conn->file_size = __posix_file_size; + conn->handle_open = __posix_handle_open; + + return (0); +} + +/* + * __wt_os_posix_cleanup -- + * Discard a POSIX configuration. + */ +int +__wt_os_posix_cleanup(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (0); +} diff --git a/src/os_posix/os_fs_posix.c b/src/os_posix/os_fs_posix.c deleted file mode 100644 index 2d0bee7ae05..00000000000 --- a/src/os_posix/os_fs_posix.c +++ /dev/null @@ -1,729 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __posix_sync -- - * Underlying support function to flush a file handle. - */ -static int -__posix_sync(WT_SESSION_IMPL *session, - int fd, const char *name, const char *func, bool block) -{ - WT_DECL_RET; - -#ifdef HAVE_SYNC_FILE_RANGE - if (!block) { - WT_SYSCALL_RETRY(sync_file_range(fd, - (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func); - } -#else - /* - * Callers attempting asynchronous flush handle ENOTSUP returns, and - * won't make further attempts. - */ - if (!block) - return (ENOTSUP); -#endif - -#if defined(F_FULLFSYNC) - /* - * OS X fsync documentation: - * "Note that while fsync() will flush all data from the host to the - * drive (i.e. the "permanent storage device"), the drive itself may - * not physically write the data to the platters for quite some time - * and it may be written in an out-of-order sequence. For applications - * that require tighter guarantees about the integrity of their data, - * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks - * the drive to flush all buffered data to permanent storage." - * - * OS X F_FULLFSYNC fcntl documentation: - * "This is currently implemented on HFS, MS-DOS (FAT), and Universal - * Disk Format (UDF) file systems." - */ - WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); - if (ret == 0) - return (0); - /* - * Assume F_FULLFSYNC failed because the file system doesn't support it - * and fallback to fsync. - */ -#endif -#if defined(HAVE_FDATASYNC) - WT_SYSCALL_RETRY(fdatasync(fd), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: %s: fdatasync", name, func); -#else - WT_SYSCALL_RETRY(fsync(fd), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: %s: fsync", name, func); -#endif -} - -/* - * __posix_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -static int -__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ -#ifdef __linux__ - WT_DECL_RET; - int fd, tret; - const char *dir; - char *copy; - - /* - * POSIX 1003.1 does not require that fsync of a file handle ensures the - * entry in the directory containing the file has also reached disk (and - * there are historic Linux filesystems requiring this), do an explicit - * fsync on a file descriptor for the directory to be sure. - */ - copy = NULL; - if (path == NULL || (dir = strrchr(path, '/')) == NULL) - path = S2C(session)->home; - else { - /* - * Copy the directory name, leaving the trailing slash in place, - * so a path of "/foo" doesn't result in an empty string. - */ - WT_RET(__wt_strndup( - session, path, (size_t)(dir - path) + 1, ©)); - path = copy; - } - - WT_SYSCALL_RETRY(( - (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_RET_MSG(session, ret, "%s: directory-sync: open", path); - - ret = __posix_sync(session, fd, path, "directory-sync", true); - - WT_SYSCALL_RETRY(close(fd), tret); - if (tret != 0) - __wt_err(session, tret, "%s: directory-sync: close", path); - return (ret == 0 ? tret : ret); -#else - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -#endif -} - -/* - * __posix_file_exist -- - * Return if the file exists. - */ -static int -__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) -{ - struct stat sb; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - name = path; - - WT_SYSCALL_RETRY(stat(name, &sb), ret); - if (ret == 0) - *existp = true; - else if (ret == ENOENT) { - *existp = false; - ret = 0; - } else - __wt_err(session, ret, "%s: file-exist: stat", name); - - __wt_free(session, path); - return (ret); -} - -/* - * __posix_file_remove -- - * Remove a file. - */ -static int -__posix_file_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - char *path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, name, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-remove: file has open handles", name); -#endif - - WT_RET(__wt_filename(session, name, &path)); - name = path; - - WT_SYSCALL_RETRY(remove(name), ret); - if (ret != 0) - __wt_err(session, ret, "%s: file-remove: remove", name); - - __wt_free(session, path); - return (ret); -} - -/* - * __posix_file_rename -- - * Rename a file. - */ -static int -__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_DECL_RET; - char *from_path, *to_path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, from, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", from); - if (__wt_handle_search(session, to, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", to); -#endif - - from_path = to_path = NULL; - WT_ERR(__wt_filename(session, from, &from_path)); - from = from_path; - WT_ERR(__wt_filename(session, to, &to_path)); - to = to_path; - - WT_SYSCALL_RETRY(rename(from, to), ret); - if (ret != 0) - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); - -err: __wt_free(session, from_path); - __wt_free(session, to_path); - return (ret); -} - -/* - * __posix_file_size -- - * Get the size of a file in bytes, by file name. - */ -static int -__posix_file_size( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - name = path; - - /* - * Optionally don't log errors on ENOENT; some callers of this function - * expect failure in that case and don't want an error message logged. - */ - WT_SYSCALL_RETRY(stat(name, &sb), ret); - if (ret == 0) - *sizep = sb.st_size; - else if (ret != ENOENT || !silent) - __wt_err(session, ret, "%s: file-size: stat", name); - - __wt_free(session, path); - - return (ret); -} - -/* - * __posix_handle_advise -- - * POSIX fadvise. - */ -static int -__posix_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) -{ -#if defined(HAVE_POSIX_FADVISE) - WT_DECL_RET; - - /* - * Refuse pre-load when direct I/O is configured for the file, the - * kernel cache isn't interesting. - */ - if (advice == POSIX_MADV_WILLNEED && fh->direct_io) - return (ENOTSUP); - - WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); - if (ret == 0) - return (0); - - /* - * Treat EINVAL as not-supported, some systems don't support some flags. - * Quietly fail, callers expect not-supported failures. - */ - if (ret == EINVAL) - return (ENOTSUP); - - WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name); -#else - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - - /* Quietly fail, callers expect not-supported failures. */ - return (ENOTSUP); -#endif -} - -/* - * __posix_handle_close -- - * ANSI C close/fclose. - */ -static int -__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - - if (fh->fp == NULL) { - WT_SYSCALL_RETRY(close(fh->fd), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name); - } - - /* If the stream was opened for writing, flush the file. */ - if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, ret, "%s: handle-close: fflush", fh->name); - } - - /* Close the file. */ - if (fclose(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, ret, "%s: handle-close: fclose", fh->name); - } - return (ret); -} - -/* - * __posix_handle_getc -- - * ANSI C fgetc. - */ -static int -__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, - ENOTSUP, "%s: handle-getc: no stream configured", fh->name); - - *chp = fgetc(fh->fp); - if (*chp != EOF || !ferror(fh->fp)) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); -} - -/* - * __posix_handle_lock -- - * Lock/unlock a file. - */ -static int -__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) -{ - struct flock fl; - WT_DECL_RET; - - /* - * WiredTiger requires this function be able to acquire locks past - * the end of file. - * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - */ - fl.l_start = 0; - fl.l_len = 1; - fl.l_type = lock ? F_WRLCK : F_UNLCK; - fl.l_whence = SEEK_SET; - - WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name); -} - -/* - * __posix_handle_printf -- - * ANSI C vfprintf. - */ -static int -__posix_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, ENOTSUP, - "%s: vfprintf: no stream configured", fh->name); - - if (vfprintf(fh->fp, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); -} - -/* - * __posix_handle_read -- - * POSIX pread. - */ -static int -__posix_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - size_t chunk; - ssize_t nr; - uint8_t *addr; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) - WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), - "%s: handle-read: pread: failed to read %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __posix_handle_size -- - * Get the size of a file in bytes, by file handle. - */ -static int -__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - - WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); - if (ret == 0) { - *sizep = sb.st_size; - return (0); - } - WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name); -} - -/* - * __posix_handle_sync -- - * POSIX fflush/fsync. - */ -static int -__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) -{ - if (fh->fp == NULL) - return (__posix_sync( - session, fh->fd, fh->name, "handle-sync", block)); - - if (fflush(fh->fp) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); -} - -/* - * __posix_handle_truncate -- - * POSIX ftruncate. - */ -static int -__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - - WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name); -} - -/* - * __posix_handle_write -- - * POSIX pwrite. - */ -static int -__posix_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - size_t chunk; - ssize_t nw; - const uint8_t *addr; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) - WT_RET_MSG(session, __wt_errno(), - "%s: handle-write: pwrite: failed to write %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __posix_handle_open_cloexec -- - * Prevent child access to file handles. - */ -static inline int -__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) -{ -#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) - int f; - - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. There's an obvious race - * between the open and this call, prefer the flag to open if available. - */ - if ((f = fcntl(fd, F_GETFD)) == -1 || - fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) - WT_RET_MSG(session, __wt_errno(), - "%s: handle-open: fcntl", name); - return (0); -#else - WT_UNUSED(session); - WT_UNUSED(fd); - WT_UNUSED(name); - return (0); -#endif -} - -/* - * __posix_handle_open -- - * Open a file handle. - */ -static int -__posix_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - mode_t mode; - int f, fd, tret; - bool direct_io; - const char *stream_mode; - - conn = S2C(session); - direct_io = false; - - /* Set up error handling. */ - fh->fd = fd = -1; - fh->fp = NULL; - - if (file_type == WT_FILE_TYPE_DIRECTORY) { - f = O_RDONLY; -#ifdef O_CLOEXEC - /* - * Security: - * The application may spawn a new process, and we don't want - * another process to have access to our file handles. - */ - f |= O_CLOEXEC; -#endif - WT_SYSCALL_RETRY(( - (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_ERR_MSG(session, ret, "%s: handle-open: open", name); - WT_ERR(__posix_handle_open_cloexec(session, fd, name)); - goto directory_open; - } - - f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; - if (LF_ISSET(WT_OPEN_CREATE)) { - f |= O_CREAT; - if (LF_ISSET(WT_OPEN_EXCLUSIVE)) - f |= O_EXCL; - mode = 0666; - } else - mode = 0; - -#ifdef O_BINARY - /* Windows clones: we always want to treat the file as a binary. */ - f |= O_BINARY; -#endif -#ifdef O_CLOEXEC - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. - */ - f |= O_CLOEXEC; -#endif -#ifdef O_DIRECT - /* - * Direct I/O: file-type is a flag from the set of possible flags stored - * in the connection handle during configuration, check for a match. - * Also, "direct_io=checkpoint" configures direct I/O for readonly data - * files. - */ - if (FLD_ISSET(conn->direct_io, file_type) || - (LF_ISSET(WT_OPEN_READONLY) && - file_type == WT_FILE_TYPE_DATA && - FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { - f |= O_DIRECT; - direct_io = true; - } -#endif - fh->direct_io = direct_io; -#ifdef O_NOATIME - /* Avoid updating metadata for read-only workloads. */ - if (file_type == WT_FILE_TYPE_DATA) - f |= O_NOATIME; -#endif - - if (file_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { -#ifdef O_DSYNC - f |= O_DSYNC; -#elif defined(O_SYNC) - f |= O_SYNC; -#else - WT_ERR_MSG(session, ENOTSUP, - "unsupported log sync mode configured"); -#endif - } - - WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_ERR_MSG(session, ret, - direct_io ? - "%s: handle-open: open: failed with direct I/O configured, " - "some filesystem types do not support direct I/O" : - "%s: handle-open: open", name); - WT_ERR(__posix_handle_open_cloexec(session, fd, name)); - - /* Disable read-ahead on trees: it slows down random read workloads. */ -#if defined(HAVE_POSIX_FADVISE) - if (file_type == WT_FILE_TYPE_DATA) { - WT_SYSCALL_RETRY( - posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); - if (ret != 0) - WT_ERR_MSG(session, ret, - "%s: handle-open: posix_fadvise", name); - } -#endif - - /* Optionally configure a stdio stream API. */ - switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { - case WT_STREAM_APPEND: - stream_mode = "a"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case WT_STREAM_READ: - stream_mode = "r"; - break; - case WT_STREAM_WRITE: - stream_mode = "w"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case 0: - default: - stream_mode = NULL; - break; - } - if (stream_mode != NULL) { - if ((fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: fdopen", name); - if (LF_ISSET(WT_STREAM_LINE_BUFFER)) - __wt_stream_set_line_buffer(fh->fp); - } - -directory_open: - fh->fd = fd; - - /* Configure fallocate calls. */ - __wt_posix_handle_allocate_configure(session, fh); - - fh->fh_advise = __posix_handle_advise; - fh->fh_allocate = __wt_posix_handle_allocate; - fh->fh_close = __posix_handle_close; - fh->fh_getc = __posix_handle_getc; - fh->fh_lock = __posix_handle_lock; - fh->fh_map = __wt_posix_map; - fh->fh_map_discard = __wt_posix_map_discard; - fh->fh_map_preload = __wt_posix_map_preload; - fh->fh_map_unmap = __wt_posix_map_unmap; - fh->fh_printf = __posix_handle_printf; - fh->fh_read = __posix_handle_read; - fh->fh_size = __posix_handle_size; - fh->fh_sync = __posix_handle_sync; - fh->fh_truncate = __posix_handle_truncate; - fh->fh_write = __posix_handle_write; - - return (0); - -err: if (fd != -1) { - WT_SYSCALL_RETRY(close(fd), tret); - if (tret != 0) - __wt_err(session, tret, "%s: handle-open: close", name); - } - return (ret); -} - -/* - * __wt_os_posix -- - * Initialize a POSIX configuration. - */ -int -__wt_os_posix(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - - conn = S2C(session); - - /* Initialize the POSIX jump table. */ - conn->file_directory_list = __wt_posix_directory_list; - conn->file_directory_sync = __posix_directory_sync; - conn->file_exist = __posix_file_exist; - conn->file_remove = __posix_file_remove; - conn->file_rename = __posix_file_rename; - conn->file_size = __posix_file_size; - conn->handle_open = __posix_handle_open; - - return (0); -} - -/* - * __wt_os_posix_cleanup -- - * Discard a POSIX configuration. - */ -int -__wt_os_posix_cleanup(WT_SESSION_IMPL *session) -{ - WT_UNUSED(session); - - return (0); -} diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c new file mode 100644 index 00000000000..f68c3cfdfe7 --- /dev/null +++ b/src/os_win/os_fs.c @@ -0,0 +1,695 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __win_directory_sync -- + * Flush a directory to ensure a file creation is durable. + */ +static int +__win_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +} + +/* + * __win_file_exist -- + * Return if the file exists. + */ +static int +__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + + ret = GetFileAttributesA(path); + + __wt_free(session, path); + + if (ret != INVALID_FILE_ATTRIBUTES) + *existp = true; + else + *existp = false; + + return (0); +} + +/* + * __win_file_remove -- + * Remove a file. + */ +static int +__win_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + char *path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, name, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + if (DeleteFileA(name) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, "%s: file-remove: DeleteFileA", name); + } + + __wt_free(session, path); + return (ret); +} + +/* + * __win_file_rename -- + * Rename a file. + */ +static int +__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + char *from_path, *to_path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, from, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); + if (__wt_handle_search(session, to, false, true, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + from = from_path; + WT_ERR(__wt_filename(session, to, &to_path)); + to = to_path; + + /* + * Check if file exists since Windows does not override the file if + * it exists. + */ + if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) + if (DeleteFileA(to) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + } + + if (ret == 0 && MoveFileA(from, to) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + } + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); +} + +/* + * __win_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__win_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WIN32_FILE_ATTRIBUTE_DATA data; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + + ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); + + __wt_free(session, path); + + if (ret != 0) { + *sizep = + ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; + return (0); + } + + /* + * Some callers of this function expect failure if the file doesn't + * exist, and don't want an error message logged. + */ + ret = __wt_win32_errno(); + if (!silent) + WT_RET_MSG(session, ret, + "%s: file-size: GetFileAttributesEx", name); + return (ret); +} + +/* + * __win_handle_advise -- + * MSVC fadvise. + */ +static int +__win_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + + /* Quietly fail, callers expect not-supported failures. */ + return (ENOTSUP); +} + +/* + * __win_handle_allocate_configure -- + * Configure fallocate behavior for a file handle. + */ +static void +__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_UNUSED(session); + + /* + * fallocate on Windows would be implemented using SetEndOfFile, which + * can also truncate the file. WiredTiger expects fallocate to ignore + * requests to truncate the file which Windows does not do, so we don't + * support the call. + */ + fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; + fh->fallocate_requires_locking = false; +} + +/* + * __win_handle_allocate -- + * Allocate space for a file handle. + */ +static int +__win_handle_allocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + + WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); + return (ENOTSUP); +} + +/* + * __win_handle_close -- + * Close a file handle. + */ +static int +__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_DECL_RET; + + if (fh->fp == NULL) { + /* + * We don't open Windows system handles when opening directories + * for flushing, as it is not necessary (or possible) to flush + * a directory on Windows. Confirm the file handle is set before + * attempting to close it. + */ + if (fh->filehandle != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-close: CloseHandle", fh->name); + } + } else { + /* If the stream was opened for writing, flush the file. */ + if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, + ret, "%s: handle-close: fflush", fh->name); + } + + /* Close the file, closing all the underlying handles. */ + if (fclose(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, + ret, "%s: handle-close: fclose", fh->name); + } + } + + /* Close the secondary handle. */ + if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle_secondary) == 0) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-close: secondary: CloseHandle", fh->name); + } + return (ret); +} + +/* + * __win_handle_getc -- + * ANSI C fgetc. + */ +static int +__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, + ENOTSUP, "%s: handle-getc: no stream configured", fh->name); + + *chp = fgetc(fh->fp); + if (*chp != EOF || !ferror(fh->fp)) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); +} + +/* + * __win_handle_lock -- + * Lock/unlock a file. + */ +static int +__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_DECL_RET; + + /* + * WiredTiger requires this function be able to acquire locks past + * the end of file. + * + * Note we're using fcntl(2) locking: all fcntl locks associated with a + * file for a given process are removed when any file descriptor for the + * file is closed by the process, even if a lock was never requested for + * that file descriptor. + * + * http://msdn.microsoft.com/ + * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx + * + * You can lock bytes that are beyond the end of the current file. + * This is useful to coordinate adding records to the end of a file. + */ + if (lock) { + if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-lock: LockFile", fh->name); + } + } else + if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { + ret = __wt_win32_errno(); + __wt_err(session, ret, + "%s: handle-lock: UnlockFile", fh->name); + } + return (ret); +} + +/* + * __win_handle_printf -- + * ANSI C vfprintf. + */ +static int +__win_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); +} + +/* + * __win_handle_read -- + * Read a chunk. + */ +static int +__win_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + DWORD chunk, nr; + uint8_t *addr; + OVERLAPPED overlapped = { 0 }; + + nr = 0; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); + overlapped.Offset = UINT32_MAX & offset; + overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); + + if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) + WT_RET_MSG(session, + nr == 0 ? WT_ERROR : __wt_win32_errno(), + "%s: handle-read: ReadFile: failed to read %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __win_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + LARGE_INTEGER size; + + if (GetFileSizeEx(fh->filehandle, &size) != 0) { + *sizep = size.QuadPart; + return (0); + } + + WT_RET_MSG(session, + __wt_win32_errno(), "%s: handle-size: GetFileSizeEx", fh->name); +} + +/* + * __win_handle_sync -- + * MSVC fflush/fsync. + */ +static int +__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_DECL_RET; + + if (fh->fp == NULL) { + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, + * and won't make further attempts. + */ + if (!block) + return (ENOTSUP); + + if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) + WT_RET_MSG(session, __wt_win32_errno(), + "%s handle-sync: FlushFileBuffers error", fh->name); + return (0); + } + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); +} + +/* + * __win_handle_truncate -- + * Truncate a file. + */ +static int +__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + LARGE_INTEGER largeint; + + largeint.QuadPart = len; + + if (fh->filehandle_secondary == INVALID_HANDLE_VALUE) + WT_RET_MSG(session, EINVAL, + "%s: handle-truncate: read-only", fh->name); + + if (SetFilePointerEx( + fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) + WT_RET_MSG(session, __wt_win32_errno(), + "%s: handle-truncate: SetFilePointerEx", fh->name); + + if (SetEndOfFile(fh->filehandle_secondary) == FALSE) { + if (GetLastError() == ERROR_USER_MAPPED_FILE) + return (EBUSY); + WT_RET_MSG(session, __wt_win32_errno(), + "%s: handle-truncate: SetEndOfFile error", fh->name); + } + return (0); +} + +/* + * __win_handle_write -- + * Write a chunk. + */ +static int +__win_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + DWORD chunk; + DWORD nw; + const uint8_t *addr; + OVERLAPPED overlapped = { 0 }; + + nw = 0; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); + overlapped.Offset = UINT32_MAX & offset; + overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); + + if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) + WT_RET_MSG(session, __wt_win32_errno(), + "%s: handle-write: WriteFile: failed to write %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __win_handle_open -- + * Open a file handle. + */ +static int +__win_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) +{ + DWORD dwCreationDisposition; + HANDLE filehandle, filehandle_secondary; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + int f, fd, share_mode; + bool direct_io; + const char *stream_mode; + + conn = S2C(session); + direct_io = false; + + /* Set up error handling. */ + fh->filehandle = fh->filehandle_secondary = + filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; + fh->fp = NULL; + + /* + * Opening a file handle on a directory is only to support filesystems + * that require a directory sync for durability, and Windows doesn't + * require that functionality: create an empty WT_FH structure with + * invalid handles. + */ + if (file_type == WT_FILE_TYPE_DIRECTORY) + goto directory_open; + + share_mode = FILE_SHARE_READ; + if (!LF_ISSET(WT_OPEN_READONLY)) + share_mode |= FILE_SHARE_WRITE; + + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. + * + * TODO: Set tighter file permissions but set bInheritHandle to false + * to prevent inheritance + */ + f = FILE_ATTRIBUTE_NORMAL; + + dwCreationDisposition = 0; + if (LF_ISSET(WT_OPEN_CREATE)) { + dwCreationDisposition = CREATE_NEW; + if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + dwCreationDisposition = CREATE_ALWAYS; + } else + dwCreationDisposition = OPEN_EXISTING; + + /* + * direct_io means no OS file caching. This requires aligned buffer + * allocations like O_DIRECT. + */ + if (FLD_ISSET(conn->direct_io, file_type) || + (LF_ISSET(WT_OPEN_READONLY) && + file_type == WT_FILE_TYPE_DATA && + FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { + f |= FILE_FLAG_NO_BUFFERING; + direct_io = true; + } + fh->direct_io = direct_io; + + /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ + if (FLD_ISSET(conn->write_through, file_type)) + f |= FILE_FLAG_WRITE_THROUGH; + + if (file_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) + f |= FILE_FLAG_WRITE_THROUGH; + + /* Disable read-ahead on trees: it slows down random read workloads. */ + if (file_type == WT_FILE_TYPE_DATA) + f |= FILE_FLAG_RANDOM_ACCESS; + + filehandle = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, + share_mode, NULL, dwCreationDisposition, f, NULL); + if (filehandle == INVALID_HANDLE_VALUE) { + if (LF_ISSET(WT_OPEN_CREATE) && + GetLastError() == ERROR_FILE_EXISTS) + filehandle = CreateFileA( + name, GENERIC_READ | GENERIC_WRITE, share_mode, + NULL, OPEN_EXISTING, f, NULL); + if (filehandle == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_win32_errno(), + direct_io ? + "%s: handle-open: CreateFileA: failed with direct " + "I/O configured, some filesystem types do not " + "support direct I/O" : + "%s: handle-open: CreateFileA", name); + } + + /* + * Open a second handle to file to support allocation/truncation + * concurrently with reads on the file. Writes would also move the file + * pointer. + */ + if (!LF_ISSET(WT_OPEN_READONLY)) { + filehandle_secondary = CreateFileA(name, + GENERIC_READ | GENERIC_WRITE, + share_mode, NULL, OPEN_EXISTING, f, NULL); + if (filehandle_secondary == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_win32_errno(), + "%s: handle-open: CreateFileA: secondary", name); + } + + /* Optionally configure a stdio stream API. */ + switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { + case WT_STREAM_APPEND: + f = _O_APPEND | _O_TEXT; + stream_mode = "a"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case WT_STREAM_READ: + f = _O_RDONLY | _O_TEXT; + stream_mode = "r"; + break; + case WT_STREAM_WRITE: + f = _O_TEXT; + stream_mode = "w"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case 0: + default: + stream_mode = NULL; + break; + } + if (stream_mode != NULL) { + if ((fd = _open_osfhandle((intptr_t)filehandle, f)) == -1) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: _open_osfhandle", name); + if ((fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); + if (LF_ISSET(WT_STREAM_LINE_BUFFER)) + __wt_stream_set_line_buffer(fh->fp); + } + + /* Configure fallocate/posix_fallocate calls. */ + __win_handle_allocate_configure(session, fh); + +directory_open: + fh->filehandle = filehandle; + fh->filehandle_secondary = filehandle_secondary; + + fh->fh_advise = __win_handle_advise; + fh->fh_allocate = __win_handle_allocate; + fh->fh_close = __win_handle_close; + fh->fh_getc = __win_handle_getc; + fh->fh_lock = __win_handle_lock; + fh->fh_map = __wt_win_map; + fh->fh_map_discard = __wt_win_map_discard; + fh->fh_map_preload = __wt_win_map_preload; + fh->fh_map_unmap = __wt_win_map_unmap; + fh->fh_printf = __win_handle_printf; + fh->fh_read = __win_handle_read; + fh->fh_size = __win_handle_size; + fh->fh_sync = __win_handle_sync; + fh->fh_truncate = __win_handle_truncate; + fh->fh_write = __win_handle_write; + + return (0); + +err: if (filehandle != INVALID_HANDLE_VALUE) + (void)CloseHandle(filehandle); + if (filehandle_secondary != INVALID_HANDLE_VALUE) + (void)CloseHandle(filehandle_secondary); + + return (ret); +} + +/* + * __wt_os_win -- + * Initialize a MSVC configuration. + */ +int +__wt_os_win(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* Initialize the POSIX jump table. */ + conn->file_directory_list = __wt_win_directory_list; + conn->file_directory_sync = __win_directory_sync; + conn->file_exist = __win_file_exist; + conn->file_remove = __win_file_remove; + conn->file_rename = __win_file_rename; + conn->file_size = __win_file_size; + conn->handle_open = __win_handle_open; + + return (0); +} + +/* + * __wt_os_win_cleanup -- + * Discard a POSIX configuration. + */ +int +__wt_os_win_cleanup(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (0); +} diff --git a/src/os_win/os_fs_win.c b/src/os_win/os_fs_win.c deleted file mode 100644 index e7f67045572..00000000000 --- a/src/os_win/os_fs_win.c +++ /dev/null @@ -1,695 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __win_directory_sync -- - * Flush a directory to ensure a file creation is durable. - */ -static int -__win_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -} - -/* - * __win_file_exist -- - * Return if the file exists. - */ -static int -__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) -{ - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - - ret = GetFileAttributesA(path); - - __wt_free(session, path); - - if (ret != INVALID_FILE_ATTRIBUTES) - *existp = true; - else - *existp = false; - - return (0); -} - -/* - * __win_file_remove -- - * Remove a file. - */ -static int -__win_file_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - char *path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, name, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-remove: file has open handles", name); -#endif - - WT_RET(__wt_filename(session, name, &path)); - name = path; - - if (DeleteFileA(name) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, "%s: file-remove: DeleteFileA", name); - } - - __wt_free(session, path); - return (ret); -} - -/* - * __win_file_rename -- - * Rename a file. - */ -static int -__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_DECL_RET; - char *from_path, *to_path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, from, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", from); - if (__wt_handle_search(session, to, false, true, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", to); -#endif - - from_path = to_path = NULL; - WT_ERR(__wt_filename(session, from, &from_path)); - from = from_path; - WT_ERR(__wt_filename(session, to, &to_path)); - to = to_path; - - /* - * Check if file exists since Windows does not override the file if - * it exists. - */ - if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) - if (DeleteFileA(to) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); - } - - if (ret == 0 && MoveFileA(from, to) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); - } - -err: __wt_free(session, from_path); - __wt_free(session, to_path); - return (ret); -} - -/* - * __win_file_size -- - * Get the size of a file in bytes, by file name. - */ -static int -__win_file_size( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) -{ - WIN32_FILE_ATTRIBUTE_DATA data; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - - ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); - - __wt_free(session, path); - - if (ret != 0) { - *sizep = - ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; - return (0); - } - - /* - * Some callers of this function expect failure if the file doesn't - * exist, and don't want an error message logged. - */ - ret = __wt_win32_errno(); - if (!silent) - WT_RET_MSG(session, ret, - "%s: file-size: GetFileAttributesEx", name); - return (ret); -} - -/* - * __win_handle_advise -- - * MSVC fadvise. - */ -static int -__win_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - - /* Quietly fail, callers expect not-supported failures. */ - return (ENOTSUP); -} - -/* - * __win_handle_allocate_configure -- - * Configure fallocate behavior for a file handle. - */ -static void -__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_UNUSED(session); - - /* - * fallocate on Windows would be implemented using SetEndOfFile, which - * can also truncate the file. WiredTiger expects fallocate to ignore - * requests to truncate the file which Windows does not do, so we don't - * support the call. - */ - fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; - fh->fallocate_requires_locking = false; -} - -/* - * __win_handle_allocate -- - * Allocate space for a file handle. - */ -static int -__win_handle_allocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - - WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); - return (ENOTSUP); -} - -/* - * __win_handle_close -- - * Close a file handle. - */ -static int -__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - - if (fh->fp == NULL) { - /* - * We don't open Windows system handles when opening directories - * for flushing, since it is not necessary (or possible) to flush - * a directory on Windows. Confirm the file handle is set before - * attempting to close it. - */ - if (fh->filehandle != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle) == 0) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-close: CloseHandle", fh->name); - } - } else { - /* If the stream was opened for writing, flush the file. */ - if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, - ret, "%s: handle-close: fflush", fh->name); - } - - /* Close the file, closing all the underlying handles. */ - if (fclose(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, - ret, "%s: handle-close: fclose", fh->name); - } - } - - /* Close the secondary handle. */ - if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle_secondary) == 0) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-close: secondary: CloseHandle", fh->name); - } - return (ret); -} - -/* - * __win_handle_getc -- - * ANSI C fgetc. - */ -static int -__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, - ENOTSUP, "%s: handle-getc: no stream configured", fh->name); - - *chp = fgetc(fh->fp); - if (*chp != EOF || !ferror(fh->fp)) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); -} - -/* - * __win_handle_lock -- - * Lock/unlock a file. - */ -static int -__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) -{ - WT_DECL_RET; - - /* - * WiredTiger requires this function be able to acquire locks past - * the end of file. - * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - * - * http://msdn.microsoft.com/ - * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx - * - * You can lock bytes that are beyond the end of the current file. - * This is useful to coordinate adding records to the end of a file. - */ - if (lock) { - if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-lock: LockFile", fh->name); - } - } else - if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { - ret = __wt_win32_errno(); - __wt_err(session, ret, - "%s: handle-lock: UnlockFile", fh->name); - } - return (ret); -} - -/* - * __win_handle_printf -- - * ANSI C vfprintf. - */ -static int -__win_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, ENOTSUP, - "%s: vfprintf: no stream configured", fh->name); - - if (vfprintf(fh->fp, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); -} - -/* - * __win_handle_read -- - * Read a chunk. - */ -static int -__win_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - DWORD chunk, nr; - uint8_t *addr; - OVERLAPPED overlapped = { 0 }; - - nr = 0; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); - overlapped.Offset = UINT32_MAX & offset; - overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - - if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) - WT_RET_MSG(session, - nr == 0 ? WT_ERROR : __wt_win32_errno(), - "%s: handle-read: ReadFile: failed to read %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __win_handle_size -- - * Get the size of a file in bytes, by file handle. - */ -static int -__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - LARGE_INTEGER size; - - if (GetFileSizeEx(fh->filehandle, &size) != 0) { - *sizep = size.QuadPart; - return (0); - } - - WT_RET_MSG(session, - __wt_win32_errno(), "%s: handle-size: GetFileSizeEx", fh->name); -} - -/* - * __win_handle_sync -- - * MSVC fflush/fsync. - */ -static int -__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) -{ - WT_DECL_RET; - - if (fh->fp == NULL) { - /* - * Callers attempting asynchronous flush handle ENOTSUP returns, - * and won't make further attempts. - */ - if (!block) - return (ENOTSUP); - - if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) - WT_RET_MSG(session, __wt_win32_errno(), - "%s handle-sync: FlushFileBuffers error", fh->name); - return (0); - } - - if (fflush(fh->fp) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); -} - -/* - * __win_handle_truncate -- - * Truncate a file. - */ -static int -__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - LARGE_INTEGER largeint; - - largeint.QuadPart = len; - - if (fh->filehandle_secondary == INVALID_HANDLE_VALUE) - WT_RET_MSG(session, EINVAL, - "%s: handle-truncate: read-only", fh->name); - - if (SetFilePointerEx( - fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) - WT_RET_MSG(session, __wt_win32_errno(), - "%s: handle-truncate: SetFilePointerEx", fh->name); - - if (SetEndOfFile(fh->filehandle_secondary) == FALSE) { - if (GetLastError() == ERROR_USER_MAPPED_FILE) - return (EBUSY); - WT_RET_MSG(session, __wt_win32_errno(), - "%s: handle-truncate: SetEndOfFile error", fh->name); - } - return (0); -} - -/* - * __win_handle_write -- - * Write a chunk. - */ -static int -__win_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - DWORD chunk; - DWORD nw; - const uint8_t *addr; - OVERLAPPED overlapped = { 0 }; - - nw = 0; - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); - overlapped.Offset = UINT32_MAX & offset; - overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - - if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) - WT_RET_MSG(session, __wt_win32_errno(), - "%s: handle-write: WriteFile: failed to write %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __win_handle_open -- - * Open a file handle. - */ -static int -__win_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) -{ - DWORD dwCreationDisposition; - HANDLE filehandle, filehandle_secondary; - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - int f, fd, share_mode; - bool direct_io; - const char *stream_mode; - - conn = S2C(session); - direct_io = false; - - /* Set up error handling. */ - fh->filehandle = fh->filehandle_secondary = - filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; - fh->fp = NULL; - - /* - * Opening a file handle on a directory is only to support filesystems - * that require a directory sync for durability, and Windows doesn't - * require that functionality: create an empty WT_FH structure with - * invalid handles. - */ - if (file_type == WT_FILE_TYPE_DIRECTORY) - goto directory_open; - - share_mode = FILE_SHARE_READ; - if (!LF_ISSET(WT_OPEN_READONLY)) - share_mode |= FILE_SHARE_WRITE; - - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. - * - * TODO: Set tighter file permissions but set bInheritHandle to false - * to prevent inheritance - */ - f = FILE_ATTRIBUTE_NORMAL; - - dwCreationDisposition = 0; - if (LF_ISSET(WT_OPEN_CREATE)) { - dwCreationDisposition = CREATE_NEW; - if (LF_ISSET(WT_OPEN_EXCLUSIVE)) - dwCreationDisposition = CREATE_ALWAYS; - } else - dwCreationDisposition = OPEN_EXISTING; - - /* - * direct_io means no OS file caching. This requires aligned buffer - * allocations like O_DIRECT. - */ - if (FLD_ISSET(conn->direct_io, file_type) || - (LF_ISSET(WT_OPEN_READONLY) && - file_type == WT_FILE_TYPE_DATA && - FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { - f |= FILE_FLAG_NO_BUFFERING; - direct_io = true; - } - fh->direct_io = direct_io; - - /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ - if (FLD_ISSET(conn->write_through, file_type)) - f |= FILE_FLAG_WRITE_THROUGH; - - if (file_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) - f |= FILE_FLAG_WRITE_THROUGH; - - /* Disable read-ahead on trees: it slows down random read workloads. */ - if (file_type == WT_FILE_TYPE_DATA) - f |= FILE_FLAG_RANDOM_ACCESS; - - filehandle = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, - share_mode, NULL, dwCreationDisposition, f, NULL); - if (filehandle == INVALID_HANDLE_VALUE) { - if (LF_ISSET(WT_OPEN_CREATE) && - GetLastError() == ERROR_FILE_EXISTS) - filehandle = CreateFileA( - name, GENERIC_READ | GENERIC_WRITE, share_mode, - NULL, OPEN_EXISTING, f, NULL); - if (filehandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_win32_errno(), - direct_io ? - "%s: handle-open: CreateFileA: failed with direct " - "I/O configured, some filesystem types do not " - "support direct I/O" : - "%s: handle-open: CreateFileA", name); - } - - /* - * Open a second handle to file to support allocation/truncation - * concurrently with reads on the file. Writes would also move the file - * pointer. - */ - if (!LF_ISSET(WT_OPEN_READONLY)) { - filehandle_secondary = CreateFileA(name, - GENERIC_READ | GENERIC_WRITE, - share_mode, NULL, OPEN_EXISTING, f, NULL); - if (filehandle_secondary == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_win32_errno(), - "%s: handle-open: CreateFileA: secondary", name); - } - - /* Optionally configure a stdio stream API. */ - switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { - case WT_STREAM_APPEND: - f = _O_APPEND | _O_TEXT; - stream_mode = "a"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case WT_STREAM_READ: - f = _O_RDONLY | _O_TEXT; - stream_mode = "r"; - break; - case WT_STREAM_WRITE: - f = _O_TEXT; - stream_mode = "w"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case 0: - default: - stream_mode = NULL; - break; - } - if (stream_mode != NULL) { - if ((fd = _open_osfhandle((intptr_t)filehandle, f)) == -1) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: _open_osfhandle", name); - if ((fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: fdopen", name); - if (LF_ISSET(WT_STREAM_LINE_BUFFER)) - __wt_stream_set_line_buffer(fh->fp); - } - - /* Configure fallocate/posix_fallocate calls. */ - __win_handle_allocate_configure(session, fh); - -directory_open: - fh->filehandle = filehandle; - fh->filehandle_secondary = filehandle_secondary; - - fh->fh_advise = __win_handle_advise; - fh->fh_allocate = __win_handle_allocate; - fh->fh_close = __win_handle_close; - fh->fh_getc = __win_handle_getc; - fh->fh_lock = __win_handle_lock; - fh->fh_map = __wt_win_map; - fh->fh_map_discard = __wt_win_map_discard; - fh->fh_map_preload = __wt_win_map_preload; - fh->fh_map_unmap = __wt_win_map_unmap; - fh->fh_printf = __win_handle_printf; - fh->fh_read = __win_handle_read; - fh->fh_size = __win_handle_size; - fh->fh_sync = __win_handle_sync; - fh->fh_truncate = __win_handle_truncate; - fh->fh_write = __win_handle_write; - - return (0); - -err: if (filehandle != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle); - if (filehandle_secondary != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle_secondary); - - return (ret); -} - -/* - * __wt_os_win -- - * Initialize a MSVC configuration. - */ -int -__wt_os_win(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - - conn = S2C(session); - - /* Initialize the POSIX jump table. */ - conn->file_directory_list = __wt_win_directory_list; - conn->file_directory_sync = __win_directory_sync; - conn->file_exist = __win_file_exist; - conn->file_remove = __win_file_remove; - conn->file_rename = __win_file_rename; - conn->file_size = __win_file_size; - conn->handle_open = __win_handle_open; - - return (0); -} - -/* - * __wt_os_win_cleanup -- - * Discard a POSIX configuration. - */ -int -__wt_os_win_cleanup(WT_SESSION_IMPL *session) -{ - WT_UNUSED(session); - - return (0); -} -- cgit v1.2.1 From e1f4765e7110802991b349799223d3efd29f4a70 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 13:13:45 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Prettify the verbose output when a handle is opened. --- src/os_common/os_fhandle.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c index 9013d67d22b..eb7323c3e26 100644 --- a/src/os_common/os_fhandle.c +++ b/src/os_common/os_fhandle.c @@ -119,14 +119,13 @@ __open_verbose(WT_SESSION_IMPL *session, break; } - sep = ""; WT_RET(__wt_scr_alloc(session, 0, &tmp)); - + sep = " ("; #define WT_OPEN_VERBOSE_FLAG(f, name) \ if (LF_ISSET(f)) { \ WT_ERR(__wt_buf_catfmt( \ session, tmp, "%s%s", sep, name)); \ - sep = ","; \ + sep = ", "; \ } WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create"); @@ -137,9 +136,12 @@ __open_verbose(WT_SESSION_IMPL *session, WT_OPEN_VERBOSE_FLAG(WT_STREAM_READ, "stream-read"); WT_OPEN_VERBOSE_FLAG(WT_STREAM_WRITE, "stream-write"); + if (tmp->size != 0) + WT_ERR(__wt_buf_catfmt(session, tmp, ")")); + ret = __wt_verbose(session, WT_VERB_FILEOPS, - "%s: handle-open: type %s, flags %s", - name, file_type_tag, (char *)tmp->data); + "%s: handle-open: type %s%s", + name, file_type_tag, tmp->size == 0 ? "" : (char *)tmp->data); err: __wt_scr_free(session, &tmp); return (ret); -- cgit v1.2.1 From fb91730bccf4bcc3eb6ace51100e833c5d136c4f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 13:33:22 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files CreateFileMappingA returns a NULL on error, not INVALID_HANDLE_VALUE. --- src/os_win/os_map.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index feee2899937..a46c78bedb9 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -33,7 +33,7 @@ __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) fh->maphandle = CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); - if (fh->maphandle == INVALID_HANDLE_VALUE) + if (fh->maphandle == NULL) WT_RET_MSG(session, __wt_win32_errno(), "%s: memory-map: CreateFileMappingA", fh->name); @@ -42,7 +42,7 @@ __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) ret = __wt_win32_errno(); (void)CloseHandle(fh->maphandle); - fh->maphandle = INVALID_HANDLE_VALUE; + fh->maphandle = NULL; WT_RET_MSG(session, ret, "%s: memory-map: MapViewOfFile", fh->name); @@ -96,6 +96,9 @@ __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) (void)__wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); + if (fh->maphandle == NULL) + return (0); + if (UnmapViewOfFile(map) == 0) { ret = __wt_win32_errno(); __wt_err(session, ret, @@ -108,7 +111,7 @@ __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) "%s: memory-unmap: CloseHandle", fh->name); } - fh->maphandle = INVALID_HANDLE_VALUE; + fh->maphandle = NULL; return (ret); } -- cgit v1.2.1 From 3b74ed994c00f85bf54de33de9772ddd996a61b1 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 13:34:27 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Do verbose configuration early, with error configuration, so we output all the messages the application wants. --- src/conn/conn_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index e5e18c8eaed..9e2f03da21f 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -2089,6 +2089,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, * The error message configuration might have changed (if set in a * configuration file, and not in the application's configuration * string), get it again. Do it first, make error messages correct. + * Ditto verbose configuration so we dump everything the application + * wants to see. */ WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); if (cval.len != 0) { @@ -2096,6 +2098,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_strndup( session, cval.str, cval.len, &conn->error_prefix)); } + WT_ERR(__wt_verbose_config(session, cfg)); WT_ERR(__wt_config_gets(session, cfg, "hazard_max", &cval)); conn->hazard_max = (uint32_t)cval.val; @@ -2168,7 +2171,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__conn_statistics_config(session, cfg)); WT_ERR(__wt_lsm_manager_config(session, cfg)); WT_ERR(__wt_sweep_config(session, cfg)); - WT_ERR(__wt_verbose_config(session, cfg)); /* Initialize the OS page size for mmap */ conn->page_size = __wt_get_vm_pagesize(); -- cgit v1.2.1 From 9f0eb802f9a1f6a94bec612fc74673a7f2b08278 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 14:03:08 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Handle a not-supported return from mmap preload when the read path is the place where we first attempt to preload. --- src/block/block_read.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/block/block_read.c b/src/block/block_read.c index 6126a6860c9..6f0c41c1b5c 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -73,6 +73,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) { WT_BLOCK *block; + WT_DECL_RET; wt_off_t offset; uint32_t cksum, size; bool mapped; @@ -90,9 +91,15 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, if (mapped) { buf->data = (uint8_t *)bm->map + offset; buf->size = size; - if (block->preload_available) - WT_RET(block->fh->fh_map_preload( - session, block->fh, buf->data, buf->size)); + if (block->preload_available) { + ret = block->fh->fh_map_preload( + session, block->fh, buf->data, buf->size); + + /* Ignore ENOTSUP, but don't try again. */ + if (ret != ENOTSUP) + return (ret); + block->preload_available = false; + } WT_STAT_FAST_CONN_INCR(session, block_map_read); WT_STAT_FAST_CONN_INCRV(session, block_byte_map_read, size); -- cgit v1.2.1 From 3dff36f4195f561fb7a07be935e445ec2acc6de6 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 18:24:06 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files In commit 45aa438, I moved the wt utility code that does hot backup copies from the wt utility into the WiredTiger library, and that's failing on Windows. Previously, the wt utility called "open" get a readonly file descriptor on the object to be copied. When I moved that code into the library, I changed it to call __wt_open (and eventually the Windows library call CreateFileA), but still opening readonly. With that change, the txn04 test started failing: test_txn04.test_txn04.test_ops(t1c.remove.row): : .\./WiredTigerLog.0000000002: handle-open: CreateFileA: The process cannot access the file because it is being used by another process. The problem is the hot backup open of a readonly handle has a different shared-mode than the WiredTiger existing handle, and that fails. One workaround/fix is to open the files read/write when doing a copy, because then the CreateFileA open has the same shared-mode as the existing handle in WiredTiger. Of course, that breaks if we're ever copying a file that's currently open readonly in WiredTiger. The change I'm doing instead is to not pass in the absolute path of the file to be copied, instead, pass in the name of the file. Then, when we open it to copy it, if the file is currently open, the backup won't open a new file handle, so there won't be any collision. If the file isn't currently open, there obviously won't be any collision. The failure mode here is if WiredTiger attempts to open a file in read/write mode, while the readonly backup copy handle is open, then the WiredTiger thread will see the collision. --- src/utilities/util_backup.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/utilities/util_backup.c b/src/utilities/util_backup.c index 55c0e336111..190c0878f38 100644 --- a/src/utilities/util_backup.c +++ b/src/utilities/util_backup.c @@ -83,7 +83,7 @@ util_backup(WT_SESSION *session, int argc, char *argv[]) while ( (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_key(cursor, &name)) == 0) - if ((ret = copy(session, name, directory)) != 0) + if ((ret = copy(session, directory, name)) != 0) goto err; if (ret == WT_NOTFOUND) ret = 0; @@ -99,25 +99,21 @@ err: free(config); } static int -copy(WT_SESSION *session, const char *name, const char *directory) +copy(WT_SESSION *session, const char *directory, const char *name) { WT_DECL_RET; size_t len; - char *from, *to; + char *to; - from = to = NULL; + to = NULL; - /* Build the 2 pathnames we need. */ - len = strlen(home) + strlen(name) + 2; - if ((from = malloc(len)) == NULL) - goto memerr; - (void)snprintf(from, len, "%s/%s", home, name); + /* Build the target pathname. */ len = strlen(directory) + strlen(name) + 2; if ((to = malloc(len)) == NULL) goto memerr; (void)snprintf(to, len, "%s/%s", directory, name); - if (verbose && printf("Backing up %s to %s\n", from, to) < 0) { + if (verbose && printf("Backing up %s/%s to %s\n", home, name, to) < 0) { fprintf(stderr, "%s: %s\n", progname, strerror(EIO)); goto err; } @@ -126,15 +122,14 @@ copy(WT_SESSION *session, const char *name, const char *directory) * Use WiredTiger to copy the file: ensuring stability of the copied * file on disk requires care, and WiredTiger knows how to do it. */ - if ((ret = __wt_copy_and_sync(session, from, to)) != 0) - fprintf(stderr, "%s to %s: backup copy: %s\n", - from, to, session->strerror(session, ret)); + if ((ret = __wt_copy_and_sync(session, name, to)) != 0) + fprintf(stderr, "%s/%s to %s: backup copy: %s\n", + home, name, to, session->strerror(session, ret)); if (0) { memerr: fprintf(stderr, "%s: %s\n", progname, strerror(errno)); } -err: free(from); - free(to); +err: free(to); return (ret); } -- cgit v1.2.1 From 497d1386b8bb149bd69b5ff1cc30c3eeec7c59ab Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 29 Mar 2016 19:25:06 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Instead of setting share-mode argument for readonly access to CreateFileA, omit GENERIC_WRITE from the desired-access argument, and leave the shared-mode argument as FILE_SHARE_READ | FILE_SHARE_WRITE in all cases. --- src/os_win/os_fs.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index f68c3cfdfe7..6ab5b25ee20 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -495,7 +495,7 @@ __win_handle_open(WT_SESSION_IMPL *session, HANDLE filehandle, filehandle_secondary; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - int f, fd, share_mode; + int desired_access, f, fd; bool direct_io; const char *stream_mode; @@ -516,9 +516,9 @@ __win_handle_open(WT_SESSION_IMPL *session, if (file_type == WT_FILE_TYPE_DIRECTORY) goto directory_open; - share_mode = FILE_SHARE_READ; + desired_access = GENERIC_READ; if (!LF_ISSET(WT_OPEN_READONLY)) - share_mode |= FILE_SHARE_WRITE; + desired_access |= GENERIC_WRITE; /* * Security: @@ -563,13 +563,14 @@ __win_handle_open(WT_SESSION_IMPL *session, if (file_type == WT_FILE_TYPE_DATA) f |= FILE_FLAG_RANDOM_ACCESS; - filehandle = CreateFileA(name, GENERIC_READ | GENERIC_WRITE, - share_mode, NULL, dwCreationDisposition, f, NULL); + filehandle = CreateFileA(name, desired_access, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, dwCreationDisposition, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) { if (LF_ISSET(WT_OPEN_CREATE) && GetLastError() == ERROR_FILE_EXISTS) - filehandle = CreateFileA( - name, GENERIC_READ | GENERIC_WRITE, share_mode, + filehandle = CreateFileA(name, desired_access, + FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_win32_errno(), @@ -586,9 +587,9 @@ __win_handle_open(WT_SESSION_IMPL *session, * pointer. */ if (!LF_ISSET(WT_OPEN_READONLY)) { - filehandle_secondary = CreateFileA(name, - GENERIC_READ | GENERIC_WRITE, - share_mode, NULL, OPEN_EXISTING, f, NULL); + filehandle_secondary = CreateFileA(name, desired_access, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, f, NULL); if (filehandle_secondary == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_win32_errno(), "%s: handle-open: CreateFileA: secondary", name); -- cgit v1.2.1 From 219e5f179eb1a54a757bc77b2b5be33ef32239a2 Mon Sep 17 00:00:00 2001 From: David Hows Date: Wed, 30 Mar 2016 16:18:52 +1100 Subject: WT-2512 - Change how throttle values per thread are calculated --- bench/wtperf/wtperf_throttle.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bench/wtperf/wtperf_throttle.c b/bench/wtperf/wtperf_throttle.c index bba1f629715..f9fb3a74ea0 100644 --- a/bench/wtperf/wtperf_throttle.c +++ b/bench/wtperf/wtperf_throttle.c @@ -104,10 +104,9 @@ worker_throttle(CONFIG_THREAD *thread) */ WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment)); } else { - throttle_cfg->ops_count = - (uint64_t) (float)(usecs_delta / - throttle_cfg->usecs_increment) * - (float)throttle_cfg->ops_per_increment; + throttle_cfg->ops_count = usecs_delta * + throttle_cfg->ops_per_increment / + throttle_cfg->usecs_increment; throttle_cfg->last_increment = now; } -- cgit v1.2.1 From b4f54c793fafb74819ee09ad25087e6fa914df2d Mon Sep 17 00:00:00 2001 From: David Hows Date: Wed, 30 Mar 2016 17:07:50 +1100 Subject: WT-2512 - Minor changes to make intent clearer. --- bench/wtperf/wtperf_throttle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/wtperf/wtperf_throttle.c b/bench/wtperf/wtperf_throttle.c index f9fb3a74ea0..a98fd9b18d7 100644 --- a/bench/wtperf/wtperf_throttle.c +++ b/bench/wtperf/wtperf_throttle.c @@ -104,8 +104,8 @@ worker_throttle(CONFIG_THREAD *thread) */ WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment)); } else { - throttle_cfg->ops_count = usecs_delta * - throttle_cfg->ops_per_increment / + throttle_cfg->ops_count = (usecs_delta * + throttle_cfg->ops_per_increment) / throttle_cfg->usecs_increment; throttle_cfg->last_increment = now; } -- cgit v1.2.1 From 6578d38c0083f02bffdfdba25c105b7261ea9dc1 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 30 Mar 2016 17:43:50 +1100 Subject: WT-2330 Fixup compiler warning. --- src/os_common/os_fhandle.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c index eb7323c3e26..8e225dcaf07 100644 --- a/src/os_common/os_fhandle.c +++ b/src/os_common/os_fhandle.c @@ -87,6 +87,9 @@ __open_verbose(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags) { #ifdef HAVE_VERBOSE + WT_DECL_RET; + WT_DECL_ITEM(tmp); + const char *file_type_tag, *sep; if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS)) return (0); @@ -94,9 +97,6 @@ __open_verbose(WT_SESSION_IMPL *session, * It's useful to track file opens when debugging platforms, take some * effort to output good tracking information. */ - WT_DECL_RET; - WT_DECL_ITEM(tmp); - const char *file_type_tag, *sep; switch (file_type) { case WT_FILE_TYPE_CHECKPOINT: -- cgit v1.2.1 From cbdf24da734d56fbb6f53659011c915aca9c6237 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 30 Mar 2016 17:44:09 +1100 Subject: WT-2330 Ignore EBUSY from truncate during verify. We ignored it in most places before, but not all. --- src/block/block_vrfy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c index b31a7b8a1b6..6570184ca10 100644 --- a/src/block/block_vrfy.c +++ b/src/block/block_vrfy.c @@ -156,7 +156,7 @@ __verify_last_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt) ci = &_ci; WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name)); WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci)); - WT_ERR(__wt_block_truncate(session, block, ci->file_size)); + WT_ERR_BUSY_OK(__wt_block_truncate(session, block, ci->file_size)); err: __wt_block_ckpt_destroy(session, ci); return (ret); -- cgit v1.2.1 From e1efe03d550b4aaef4556e54e2a066fac6c18f3b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 30 Mar 2016 09:45:42 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files warning C4098: '__wt_stream_set_line_buffer': 'void' function returning a value --- src/os_win/os_fs.c | 9 +++++++++ src/os_win/os_setvbuf.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 6ab5b25ee20..23ef1127c45 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -395,6 +395,15 @@ __win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { WT_DECL_RET; + /* + * We don't open Windows system handles when opening directories + * for flushing, as it is not necessary (or possible) to flush + * a directory on Windows. Confirm the file handle is set before + * attempting to sync it. + */ + if (fhp->fp == NULL && fh->filehandle == INVALID_HANDLE_VALUE) + return (0); + if (fh->fp == NULL) { /* * Callers attempting asynchronous flush handle ENOTSUP returns, diff --git a/src/os_win/os_setvbuf.c b/src/os_win/os_setvbuf.c index 3eef07915c1..b38ab1ebee2 100644 --- a/src/os_win/os_setvbuf.c +++ b/src/os_win/os_setvbuf.c @@ -24,7 +24,7 @@ __wt_stream_set_line_buffer(FILE *fp) * same as full-buffering. We assume our caller wants immediate output, * set no-buffering instead. */ - return (__wt_stream_set_no_buffer(fp)); + __wt_stream_set_no_buffer(fp); } /* -- cgit v1.2.1 From f674c0348bcfc8a08f65ae27d64b9abf05356625 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 30 Mar 2016 09:47:02 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Update __win_handle_sync() to handle WT_FH structures without open handles, allows removal of the __linux__ #ifdef from __wt_directory_sync_fh subt. --- src/include/misc.i | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/include/misc.i b/src/include/misc.i index 2926ff07e58..a96ce405c89 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -171,12 +171,7 @@ __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) { WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); -#ifdef __linux__ return (fh->fh_sync(session, fh, true)); -#else - WT_UNUSED(fh); - return (0); -#endif } /* -- cgit v1.2.1 From 05eae6facb37714829372c84b84e910dca844f1b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 30 Mar 2016 09:50:28 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Fix a typo. --- src/os_win/os_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 23ef1127c45..be383f229f3 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -401,7 +401,7 @@ __win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) * a directory on Windows. Confirm the file handle is set before * attempting to sync it. */ - if (fhp->fp == NULL && fh->filehandle == INVALID_HANDLE_VALUE) + if (fh->fp == NULL && fh->filehandle == INVALID_HANDLE_VALUE) return (0); if (fh->fp == NULL) { -- cgit v1.2.1 From d01a2c334c679045b64332f6059568d75c1d499e Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 30 Mar 2016 10:08:40 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Whitespace. --- src/os_common/os_fhandle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c index 8e225dcaf07..749617b928a 100644 --- a/src/os_common/os_fhandle.c +++ b/src/os_common/os_fhandle.c @@ -90,6 +90,7 @@ __open_verbose(WT_SESSION_IMPL *session, WT_DECL_RET; WT_DECL_ITEM(tmp); const char *file_type_tag, *sep; + if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS)) return (0); -- cgit v1.2.1 From 1b44665c66c71f4cd64f360f5620ef6b20b3efe0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 30 Mar 2016 10:43:10 -0400 Subject: WT-2330: in-memory configurations should not create on-disk collection files Rename __wt_win32_errno() to be __wt_getlasterror() to match Windows naming convention. Rework dynamic library load/unload routines to fix final return value, it shouldn't be just a negation of the Windows failure. --- dist/s_string.ok | 1 + src/include/extern.h | 2 +- src/os_win/os_dir.c | 2 +- src/os_win/os_dlopen.c | 24 ++++++++++-------------- src/os_win/os_errno.c | 4 ++-- src/os_win/os_fs.c | 32 ++++++++++++++++---------------- src/os_win/os_getenv.c | 2 +- src/os_win/os_map.c | 8 ++++---- src/os_win/os_mtx_cond.c | 2 +- src/os_win/os_thread.c | 4 ++-- 10 files changed, 39 insertions(+), 42 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index 882a1f78d7b..06ffddf147a 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -651,6 +651,7 @@ gdb ge getc getenv +getlasterror getline getone getones diff --git a/src/include/extern.h b/src/include/extern.h index f2b620570f1..89e293bcff6 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -733,6 +733,7 @@ extern int __wt_errno(void); extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); extern int __wt_get_vm_pagesize(void); extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); +extern int __wt_getlasterror(void); extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); @@ -764,7 +765,6 @@ extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, v extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); -extern int __wt_win32_errno(void); extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); extern int __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp); extern int __wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c index 95a7f66827b..64eae60983c 100644 --- a/src/os_win/os_dir.c +++ b/src/os_win/os_dir.c @@ -43,7 +43,7 @@ __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, findhandle = FindFirstFileA(pathbuf->data, &finddata); if (findhandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_win32_errno(), + WT_ERR_MSG(session, __wt_getlasterror(), "%s: directory-list: FindFirstFile", pathbuf->data); count = 0; diff --git a/src/os_win/os_dlopen.c b/src/os_win/os_dlopen.c index 0c0d70ec1c9..ce949e4ea5f 100644 --- a/src/os_win/os_dlopen.c +++ b/src/os_win/os_dlopen.c @@ -23,18 +23,17 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) /* NULL means load from the current binary */ if (path == NULL) { - ret = GetModuleHandleExA(0, NULL, (HMODULE *)&dlh->handle); - if (ret == FALSE) - WT_ERR_MSG(session, __wt_win32_errno(), + if (GetModuleHandleExA( + 0, NULL, (HMODULE *)&dlh->handle) == FALSE) { + ret = __wt_getlasterror(); + WT_ERR_MSG(session, ret, "GetModuleHandleEx(%s): %s", path, 0); + } } else { // TODO: load dll here DebugBreak(); } - /* Windows returns 0 on failure, WT expects 0 on success */ - ret = !ret; - *dlhp = dlh; if (0) { err: __wt_free(session, dlh->name); @@ -56,10 +55,9 @@ __wt_dlsym(WT_SESSION_IMPL *session, *(void **)sym_ret = NULL; sym = GetProcAddress(dlh->handle, name); - if (sym == NULL && fail) { - WT_RET_MSG(session, __wt_win32_errno(), + if (sym == NULL && fail) + WT_RET_MSG(session, __wt_getlasterror(), "GetProcAddress(%s in %s)", name, dlh->name); - } *(void **)sym_ret = sym; return (0); @@ -74,13 +72,11 @@ __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) { WT_DECL_RET; - if ((ret = FreeLibrary(dlh->handle)) == FALSE) { - __wt_err(session, __wt_win32_errno(), "FreeLibrary"); + if (FreeLibrary(dlh->handle) == FALSE) { + ret = __wt_getlasterror(); + __wt_err(session, ret, "FreeLibrary: %s", dlh->name); } - /* Windows returns 0 on failure, WT expects 0 on success */ - ret = !ret; - __wt_free(session, dlh->name); __wt_free(session, dlh); return (ret); diff --git a/src/os_win/os_errno.c b/src/os_win/os_errno.c index 269be2abee3..f3fffd5ef42 100644 --- a/src/os_win/os_errno.c +++ b/src/os_win/os_errno.c @@ -72,11 +72,11 @@ __wt_errno(void) } /* - * __wt_win32_errno -- + * __wt_getlasterror -- * Return GetLastError, or WT_ERROR if error not set. */ int -__wt_win32_errno(void) +__wt_getlasterror(void) { /* * Called when we know an error occurred, and we want the system diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index be383f229f3..6092f698f7a 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -64,7 +64,7 @@ __win_file_remove(WT_SESSION_IMPL *session, const char *name) name = path; if (DeleteFileA(name) == FALSE) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s: file-remove: DeleteFileA", name); } @@ -103,13 +103,13 @@ __win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) */ if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) if (DeleteFileA(to) == FALSE) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s to %s: file-rename: rename", from, to); } if (ret == 0 && MoveFileA(from, to) == FALSE) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s to %s: file-rename: rename", from, to); } @@ -147,7 +147,7 @@ __win_file_size( * Some callers of this function expect failure if the file doesn't * exist, and don't want an error message logged. */ - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); if (!silent) WT_RET_MSG(session, ret, "%s: file-size: GetFileAttributesEx", name); @@ -226,7 +226,7 @@ __win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) */ if (fh->filehandle != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle) == 0) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s: handle-close: CloseHandle", fh->name); } @@ -249,7 +249,7 @@ __win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) /* Close the secondary handle. */ if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle_secondary) == 0) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s: handle-close: secondary: CloseHandle", fh->name); } @@ -299,13 +299,13 @@ __win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) */ if (lock) { if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s: handle-lock: LockFile", fh->name); } } else if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s: handle-lock: UnlockFile", fh->name); } @@ -360,7 +360,7 @@ __win_handle_read( if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) WT_RET_MSG(session, - nr == 0 ? WT_ERROR : __wt_win32_errno(), + nr == 0 ? WT_ERROR : __wt_getlasterror(), "%s: handle-read: ReadFile: failed to read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, fh->name, chunk, (uintmax_t)offset); @@ -383,7 +383,7 @@ __win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) } WT_RET_MSG(session, - __wt_win32_errno(), "%s: handle-size: GetFileSizeEx", fh->name); + __wt_getlasterror(), "%s: handle-size: GetFileSizeEx", fh->name); } /* @@ -413,7 +413,7 @@ __win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) return (ENOTSUP); if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) - WT_RET_MSG(session, __wt_win32_errno(), + WT_RET_MSG(session, __wt_getlasterror(), "%s handle-sync: FlushFileBuffers error", fh->name); return (0); } @@ -441,13 +441,13 @@ __win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) if (SetFilePointerEx( fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) - WT_RET_MSG(session, __wt_win32_errno(), + WT_RET_MSG(session, __wt_getlasterror(), "%s: handle-truncate: SetFilePointerEx", fh->name); if (SetEndOfFile(fh->filehandle_secondary) == FALSE) { if (GetLastError() == ERROR_USER_MAPPED_FILE) return (EBUSY); - WT_RET_MSG(session, __wt_win32_errno(), + WT_RET_MSG(session, __wt_getlasterror(), "%s: handle-truncate: SetEndOfFile error", fh->name); } return (0); @@ -484,7 +484,7 @@ __win_handle_write(WT_SESSION_IMPL *session, overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) - WT_RET_MSG(session, __wt_win32_errno(), + WT_RET_MSG(session, __wt_getlasterror(), "%s: handle-write: WriteFile: failed to write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, fh->name, chunk, (uintmax_t)offset); @@ -582,7 +582,7 @@ __win_handle_open(WT_SESSION_IMPL *session, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_win32_errno(), + WT_ERR_MSG(session, __wt_getlasterror(), direct_io ? "%s: handle-open: CreateFileA: failed with direct " "I/O configured, some filesystem types do not " @@ -600,7 +600,7 @@ __win_handle_open(WT_SESSION_IMPL *session, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); if (filehandle_secondary == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_win32_errno(), + WT_ERR_MSG(session, __wt_getlasterror(), "%s: handle-open: CreateFileA: secondary", name); } diff --git a/src/os_win/os_getenv.c b/src/os_win/os_getenv.c index cb095381942..9b297ac3a74 100644 --- a/src/os_win/os_getenv.c +++ b/src/os_win/os_getenv.c @@ -29,7 +29,7 @@ __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) ret = GetEnvironmentVariableA(variable, *envp, size); /* We expect the number of bytes not including nul terminator. */ if ((ret + 1) != size) - WT_RET_MSG(session, __wt_win32_errno(), + WT_RET_MSG(session, __wt_getlasterror(), "GetEnvironmentVariableA failed: %s", variable); return (0); diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index a46c78bedb9..c75a3f1cf5f 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -34,12 +34,12 @@ __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) fh->maphandle = CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); if (fh->maphandle == NULL) - WT_RET_MSG(session, __wt_win32_errno(), + WT_RET_MSG(session, __wt_getlasterror(), "%s: memory-map: CreateFileMappingA", fh->name); if ((map = MapViewOfFile(fh->maphandle, FILE_MAP_READ, 0, 0, len)) == NULL) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); (void)CloseHandle(fh->maphandle); fh->maphandle = NULL; @@ -100,13 +100,13 @@ __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) return (0); if (UnmapViewOfFile(map) == 0) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s: memory-unmap: UnmapViewOfFile", fh->name); } if (CloseHandle(fh->maphandle) == 0) { - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); __wt_err(session, ret, "%s: memory-unmap: CloseHandle", fh->name); } diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c index 28fd24e68b0..af4a5035076 100644 --- a/src/os_win/os_mtx_cond.c +++ b/src/os_win/os_mtx_cond.c @@ -103,7 +103,7 @@ __wt_cond_wait_signal( if ((err = GetLastError()) == ERROR_TIMEOUT) *signalled = false; else - ret = __wt_win32_errno(); + ret = __wt_getlasterror(); } else ret = 0; diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c index 1a223a19cf8..94c5a8b0ab2 100644 --- a/src/os_win/os_thread.c +++ b/src/os_win/os_thread.c @@ -38,12 +38,12 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * If we fail to wait, we will leak handles so do not continue */ WT_PANIC_RET(session, - ret == WAIT_FAILED ? __wt_win32_errno() : ret, + ret == WAIT_FAILED ? __wt_getlasterror() : ret, "thread join: WaitForSingleObject"); if (CloseHandle(tid) == 0) { WT_RET_MSG(session, - __wt_win32_errno(), "thread join: CloseHandle"); + __wt_getlasterror(), "thread join: CloseHandle"); } return (0); -- cgit v1.2.1 From 689c0b58ad1d8a2205967bd193a81e03516f46e4 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 31 Mar 2016 14:45:56 +1100 Subject: WT-2330 Move the map handle back into the block structure. Multiple block structures can share the same file handle. So saving the map handle into the WT_FH structure means we overwrite it each time we memory map a region from a different btree. --- dist/s_string.ok | 1 + src/block/block_map.c | 13 +++++++++---- src/block/block_mgr.c | 8 ++++---- src/include/block.h | 1 + src/include/extern.h | 12 ++++++------ src/include/os.h | 6 +++--- src/os_common/os_fs_stdio.c | 9 ++++++--- src/os_posix/os_map.c | 10 ++++++++-- src/os_win/os_map.c | 25 +++++++++++++------------ 9 files changed, 51 insertions(+), 34 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index 06ffddf147a..eed034abb47 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -891,6 +891,7 @@ pre pread prealloc preload +preloaded prepend prepended prepending diff --git a/src/block/block_map.c b/src/block/block_map.c index 8ae6e815798..b16fe7f8423 100644 --- a/src/block/block_map.c +++ b/src/block/block_map.c @@ -14,7 +14,8 @@ */ int __wt_block_map( - WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp) + WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, + void **mappingcookie) { WT_DECL_RET; @@ -28,6 +29,7 @@ __wt_block_map( */ WT_UNUSED(session); WT_UNUSED(block); + WT_UNUSED(mappingcookie); #else /* Map support is configurable. */ if (!S2C(session)->mmap) @@ -53,7 +55,8 @@ __wt_block_map( * Ignore not-supported errors, we'll read the file through the cache * if map fails. */ - ret = block->fh->fh_map(session, block->fh, mapp, maplenp); + ret = block->fh->fh_map( + session, block->fh, mapp, maplenp, mappingcookie); if (ret == ENOTSUP) ret = 0; #endif @@ -67,8 +70,10 @@ __wt_block_map( */ int __wt_block_unmap( - WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen) + WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen, + void **mappingcookie) { /* Unmap the file from memory. */ - return (block->fh->fh_map_unmap(session, block->fh, map, maplen)); + return (block->fh->fh_map_unmap( + session, block->fh, map, maplen, mappingcookie)); } diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c index f20bb991ff7..06150a0f062 100644 --- a/src/block/block_mgr.c +++ b/src/block/block_mgr.c @@ -102,8 +102,8 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session, * Read-only objects are optionally mapped into memory instead * of being read into cache buffers. */ - WT_RET(__wt_block_map( - session, bm->block, &bm->map, &bm->maplen)); + WT_RET(__wt_block_map(session, + bm->block, &bm->map, &bm->maplen, &bm->mappingcookie)); /* * If this handle is for a checkpoint, that is, read-only, there @@ -148,8 +148,8 @@ __bm_checkpoint_unload(WT_BM *bm, WT_SESSION_IMPL *session) /* Unmap any mapped segment. */ if (bm->map != NULL) - WT_TRET(__wt_block_unmap( - session, bm->block, bm->map, bm->maplen)); + WT_TRET(__wt_block_unmap(session, + bm->block, bm->map, bm->maplen, &bm->mappingcookie)); /* Unload the checkpoint. */ WT_TRET(__wt_block_checkpoint_unload(session, bm->block, !bm->is_live)); diff --git a/src/include/block.h b/src/include/block.h index d1ea04572e7..e964fb4e8c2 100644 --- a/src/include/block.h +++ b/src/include/block.h @@ -198,6 +198,7 @@ struct __wt_bm { void *map; /* Mapped region */ size_t maplen; + void *mappingcookie; /* * There's only a single block manager handle that can be written, all diff --git a/src/include/extern.h b/src/include/extern.h index 89e293bcff6..d4e67b2f313 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -41,8 +41,8 @@ extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, W extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el); extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size); extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); -extern int __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp); -extern int __wt_block_unmap( WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen); +extern int __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, void **mappingcookie); +extern int __wt_block_unmap( WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen, void **mappingcookie); extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp); extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename); extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize); @@ -752,10 +752,10 @@ extern int __wt_os_win(WT_SESSION_IMPL *session); extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session); extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); -extern int __wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp); +extern int __wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); extern int __wt_posix_map_discard( WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); extern int __wt_posix_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); -extern int __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len); +extern int __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); @@ -766,10 +766,10 @@ extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, c extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); -extern int __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp); +extern int __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); extern int __wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); extern int __wt_win_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); -extern int __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len); +extern int __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); diff --git a/src/include/os.h b/src/include/os.h index 92d469ff2a7..5034b17511d 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -82,7 +82,6 @@ struct __wt_fh { HANDLE filehandle; /* Windows file handle */ HANDLE filehandle_secondary; /* Windows file handle for file size changes */ - HANDLE maphandle; /* Windows map object */ #else int fd; /* POSIX file handle */ #endif @@ -113,10 +112,11 @@ struct __wt_fh { int (*fh_close)(WT_SESSION_IMPL *, WT_FH *); int (*fh_getc)(WT_SESSION_IMPL *, WT_FH *, int *); int (*fh_lock)(WT_SESSION_IMPL *, WT_FH *, bool); - int (*fh_map)(WT_SESSION_IMPL *, WT_FH *, void *, size_t *); + int (*fh_map)(WT_SESSION_IMPL *, WT_FH *, void *, size_t *, void **); int (*fh_map_discard)(WT_SESSION_IMPL *, WT_FH *, void *, size_t); int (*fh_map_preload)(WT_SESSION_IMPL *, WT_FH *, const void *, size_t); - int (*fh_map_unmap)(WT_SESSION_IMPL *, WT_FH *, void *, size_t); + int (*fh_map_unmap)( + WT_SESSION_IMPL *, WT_FH *, void *, size_t, void **); int (*fh_printf)(WT_SESSION_IMPL *, WT_FH *, const char *, va_list); int (*fh_read)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *); int (*fh_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); diff --git a/src/os_common/os_fs_stdio.c b/src/os_common/os_fs_stdio.c index 5e3cd522bd6..9baba9b6945 100644 --- a/src/os_common/os_fs_stdio.c +++ b/src/os_common/os_fs_stdio.c @@ -72,10 +72,12 @@ __stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) * Map a file. */ static int -__stdio_handle_map(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t *lenp) +__stdio_handle_map(WT_SESSION_IMPL *session, + WT_FH *fh, void *p, size_t *lenp, void **mappingcookie) { WT_UNUSED(p); WT_UNUSED(lenp); + WT_UNUSED(mappingcookie); WT_RET_MSG(session, ENOTSUP, "%s: handle-map", fh->name); } @@ -110,11 +112,12 @@ __stdio_handle_map_preload( * Unmap a file. */ static int -__stdio_handle_map_unmap( - WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len) +__stdio_handle_map_unmap(WT_SESSION_IMPL *session, + WT_FH *fh, void *p, size_t len, void **mappingcookie) { WT_UNUSED(p); WT_UNUSED(len); + WT_UNUSED(mappingcookie); WT_RET_MSG(session, ENOTSUP, "%s: handle-map-unmap", fh->name); } diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 304bb32df31..de28891ffd1 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -13,12 +13,15 @@ * Map a file into memory. */ int -__wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) +__wt_posix_map(WT_SESSION_IMPL *session, + WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) { size_t len; wt_off_t file_size; void *map; + WT_UNUSED(mappingcookie); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); /* @@ -173,8 +176,11 @@ __wt_posix_map_discard( * Remove a memory mapping. */ int -__wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) +__wt_posix_map_unmap(WT_SESSION_IMPL *session, + WT_FH *fh, void *map, size_t len, void **mappingcookie) { + WT_UNUSED(mappingcookie); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); (void)__wt_verbose(session, WT_VERB_HANDLEOPS, diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index c75a3f1cf5f..b043f9c9923 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -13,7 +13,8 @@ * Map a file into memory. */ int -__wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) +__wt_win_map(WT_SESSION_IMPL *session, + WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) { WT_DECL_RET; size_t len; @@ -31,18 +32,18 @@ __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp) (void)__wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len); - fh->maphandle = + *mappingcookie = CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); - if (fh->maphandle == NULL) + if (*mappingcookie == NULL) WT_RET_MSG(session, __wt_getlasterror(), "%s: memory-map: CreateFileMappingA", fh->name); if ((map = - MapViewOfFile(fh->maphandle, FILE_MAP_READ, 0, 0, len)) == NULL) { + MapViewOfFile(*mappingcookie, FILE_MAP_READ, 0, 0, len)) == NULL) { + /* Retrieve the error before cleaning up. */ ret = __wt_getlasterror(); - - (void)CloseHandle(fh->maphandle); - fh->maphandle = NULL; + CloseHandle(*mappingcookie); + *mappingcookie = NULL; WT_RET_MSG(session, ret, "%s: memory-map: MapViewOfFile", fh->name); @@ -89,15 +90,15 @@ __wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) * Remove a memory mapping. */ int -__wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) +__wt_win_map_unmap(WT_SESSION_IMPL *session, + WT_FH *fh, void *map, size_t len, void **mappingcookie) { WT_DECL_RET; (void)__wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); - if (fh->maphandle == NULL) - return (0); + WT_ASSERT(session, *mappingcookie != NULL); if (UnmapViewOfFile(map) == 0) { ret = __wt_getlasterror(); @@ -105,13 +106,13 @@ __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len) "%s: memory-unmap: UnmapViewOfFile", fh->name); } - if (CloseHandle(fh->maphandle) == 0) { + if (CloseHandle(*mappingcookie) == 0) { ret = __wt_getlasterror(); __wt_err(session, ret, "%s: memory-unmap: CloseHandle", fh->name); } - fh->maphandle = NULL; + *mappingcookie = NULL; return (ret); } -- cgit v1.2.1 From fcce4239d9171fe0b3c58e9d04e7373741a6d26c Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 31 Mar 2016 15:06:14 +1100 Subject: WT-2330 Free memory in __wt_directory_sync --- src/os_posix/os_fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c index 2d0bee7ae05..7f38464e175 100644 --- a/src/os_posix/os_fs.c +++ b/src/os_posix/os_fs.c @@ -106,13 +106,14 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) WT_SYSCALL_RETRY(( (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); if (ret != 0) - WT_RET_MSG(session, ret, "%s: directory-sync: open", path); + WT_ERR_MSG(session, ret, "%s: directory-sync: open", path); ret = __posix_sync(session, fd, path, "directory-sync", true); WT_SYSCALL_RETRY(close(fd), tret); if (tret != 0) __wt_err(session, tret, "%s: directory-sync: close", path); +err: __wt_free(session, copy); return (ret == 0 ? tret : ret); #else WT_UNUSED(session); -- cgit v1.2.1 From 0d61a3bf1b0a9521b5c8cda22e78f6875725a5c3 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 31 Mar 2016 15:12:21 +1100 Subject: WT-2330 Fix compiler warning: ../src/os_posix/os_fs.c: In function '__posix_directory_sync': ../src/os_posix/os_fs.c:83:10: error: 'tret' may be used uninitialized in this function [-Werror=maybe-uninitialized] int fd, tret; --- src/os_posix/os_fs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c index 7f38464e175..68d70594582 100644 --- a/src/os_posix/os_fs.c +++ b/src/os_posix/os_fs.c @@ -84,6 +84,7 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) const char *dir; char *copy; + tret = 0; /* * POSIX 1003.1 does not require that fsync of a file handle ensures the * entry in the directory containing the file has also reached disk (and @@ -111,10 +112,13 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) ret = __posix_sync(session, fd, path, "directory-sync", true); WT_SYSCALL_RETRY(close(fd), tret); - if (tret != 0) + if (tret != 0) { __wt_err(session, tret, "%s: directory-sync: close", path); + if (ret == 0) + ret = tret; + } err: __wt_free(session, copy); - return (ret == 0 ? tret : ret); + return (ret); #else WT_UNUSED(session); WT_UNUSED(path); -- cgit v1.2.1 From 876d4a1be3469b0dfca383053f73c2c8b290d091 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 31 Mar 2016 11:14:37 -0400 Subject: WT-2522: Incorrect format code in message Use "%lu" to print the DWORD, it's a 32-bit "unsigned long". --- src/os_win/os_fs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 6092f698f7a..bf8232419e9 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -361,8 +361,8 @@ __win_handle_read( if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_getlasterror(), - "%s: handle-read: ReadFile: failed to read %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, + "%s: handle-read: ReadFile: failed to read %lu " + "bytes at offset %" PRIuMAX, fh->name, chunk, (uintmax_t)offset); } return (0); @@ -485,8 +485,8 @@ __win_handle_write(WT_SESSION_IMPL *session, if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) WT_RET_MSG(session, __wt_getlasterror(), - "%s: handle-write: WriteFile: failed to write %" - WT_SIZET_FMT " bytes at offset %" PRIuMAX, + "%s: handle-write: WriteFile: failed to write %lu " + "bytes at offset %" PRIuMAX, fh->name, chunk, (uintmax_t)offset); } return (0); -- cgit v1.2.1 From 5661fa986dd56d5df18ea069b7779578cb0809ad Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 31 Mar 2016 12:16:47 -0400 Subject: WT-2513 Fix conversion warning. --- src/log/log_slot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log/log_slot.c b/src/log/log_slot.c index 570d1c9ce48..bcca212603a 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -96,7 +96,7 @@ retry: slot->slot_end_lsn = slot->slot_start_lsn; end_offset = WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered; - slot->slot_end_lsn.l.offset += end_offset; + slot->slot_end_lsn.l.offset += (uint32_t)end_offset; WT_STAT_FAST_CONN_INCRV(session, log_slot_consolidated, end_offset); /* -- cgit v1.2.1 From 1a32953e027b417c73a9aca3b3eca149f1fa10d8 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 1 Apr 2016 10:12:28 +1100 Subject: Update Evergreen build file to support correct OS X target. --- test/mciproject.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/mciproject.yml b/test/mciproject.yml index 49caa44438d..9abdf23ec3b 100644 --- a/test/mciproject.yml +++ b/test/mciproject.yml @@ -124,10 +124,10 @@ buildvariants: - name: compile-windows-alt - name: fops-windows -- name: osx-108 - display_name: OS X 10.8 +- name: osx-1010 + display_name: OS X 10.10 run_on: - - osx-108 + - osx-1010 expansions: smp_command: -j$(sysctl -n hw.logicalcpu) configure_env_vars: PATH=/opt/local/bin:$PATH -- cgit v1.2.1 From 874e90b864d6ce5215aa7c77b3b12d424cbbf35c Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 1 Apr 2016 16:53:32 +1100 Subject: WT-2330 Fix GCC 4.7 compiler warnings. --- src/os_common/filename.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/os_common/filename.c b/src/os_common/filename.c index 83a1a985378..c1109bfc75e 100644 --- a/src/os_common/filename.c +++ b/src/os_common/filename.c @@ -137,8 +137,7 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_DECL_RET; WT_FH *ffh, *tfh; WT_SESSION_IMPL *session; - size_t n; - wt_off_t offset, size; + wt_off_t n, offset, size; char *buf; session = (WT_SESSION_IMPL *)wt_session; @@ -176,9 +175,11 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) /* Get the file's size, then copy the bytes. */ WT_ERR(__wt_filesize(session, ffh, &size)); for (offset = 0; size > 0; size -= n, offset += n) { - n = (size_t)WT_MIN(size, WT_BACKUP_COPY_SIZE); - WT_ERR(__wt_read(session, ffh, offset, n, buf)); - WT_ERR(__wt_write(session, tfh, offset, n, buf)); + n = WT_MIN(size, WT_BACKUP_COPY_SIZE); + WT_ERR(__wt_read( + session, ffh, offset, (size_t)n, buf)); + WT_ERR(__wt_write( + session, tfh, offset, (size_t)n, buf)); } /* Close the from handle, then swap the temporary file into place. */ -- cgit v1.2.1 From 9b3d1f470e9623364d3172a70c542fb9f3b60d11 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 1 Apr 2016 08:07:34 -0400 Subject: WT-2525: in-memory configurations: miscellaneous cleanups KNF --- src/os_common/filename.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/os_common/filename.c b/src/os_common/filename.c index c1109bfc75e..dfd67284948 100644 --- a/src/os_common/filename.c +++ b/src/os_common/filename.c @@ -176,10 +176,8 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_ERR(__wt_filesize(session, ffh, &size)); for (offset = 0; size > 0; size -= n, offset += n) { n = WT_MIN(size, WT_BACKUP_COPY_SIZE); - WT_ERR(__wt_read( - session, ffh, offset, (size_t)n, buf)); - WT_ERR(__wt_write( - session, tfh, offset, (size_t)n, buf)); + WT_ERR(__wt_read(session, ffh, offset, (size_t)n, buf)); + WT_ERR(__wt_write(session, tfh, offset, (size_t)n, buf)); } /* Close the from handle, then swap the temporary file into place. */ -- cgit v1.2.1 From 39edc9f66220bab5a8c5e8f0ae6f26979605cd39 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 1 Apr 2016 08:18:11 -0400 Subject: WT-2525: in-memory configurations: miscellaneous cleanups Coverity 1353723: don't provide a special-case locking path for in-memory configuration file size queries. It's complicated and invites future bugs, and there's no performance reason we need the magic. --- src/include/extern.h | 3 +-- src/os_common/os_fhandle.c | 25 +++++-------------------- src/os_common/os_fs_inmemory.c | 8 ++++---- src/os_posix/os_fs.c | 6 +++--- src/os_win/os_fs.c | 6 +++--- 5 files changed, 16 insertions(+), 32 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index d4e67b2f313..292bcfb1c7c 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -713,7 +713,7 @@ extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char * extern int __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_recover(WT_SESSION_IMPL *session); extern bool __wt_absolute_path(const char *path); -extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp); +extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp); extern bool __wt_has_priv(void); extern const char *__wt_path_separator(void); extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); @@ -773,7 +773,6 @@ extern int __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, si extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); -extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session); extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); extern void __wt_stream_set_line_buffer(FILE *fp); diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c index 749617b928a..b16b2e24bfa 100644 --- a/src/os_common/os_fhandle.c +++ b/src/os_common/os_fhandle.c @@ -13,8 +13,8 @@ * Search for a matching handle. */ bool -__wt_handle_search(WT_SESSION_IMPL *session, const char *name, - bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp) +__wt_handle_search(WT_SESSION_IMPL *session, + const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_FH *fh; @@ -58,26 +58,11 @@ __wt_handle_search(WT_SESSION_IMPL *session, const char *name, *fhp = newfh; } - /* - * Our caller may be operating on the handle itself, optionally leave - * the list locked. - */ - if (unlock) - __wt_spin_unlock(session, &conn->fh_lock); + __wt_spin_unlock(session, &conn->fh_lock); return (found); } -/* - * __wt_handle_search_unlock -- - * Release handle lock. - */ -void -__wt_handle_search_unlock(WT_SESSION_IMPL *session) -{ - __wt_spin_unlock(session, &S2C(session)->fh_lock); -} - /* * __open_verbose -- * Optionally output a verbose message on handle open. @@ -179,7 +164,7 @@ __wt_open(WT_SESSION_IMPL *session, WT_RET(__open_verbose(session, name, file_type, flags)); /* Check if the handle is already open. */ - if (__wt_handle_search(session, name, true, true, NULL, &fh)) { + if (__wt_handle_search(session, name, true, NULL, &fh)) { /* * XXX * The in-memory implementation has to reset the file offset @@ -223,7 +208,7 @@ __wt_open(WT_SESSION_IMPL *session, * Repeat the check for a match: if there's no match, link our newly * created handle onto the database's list of files. */ - if (__wt_handle_search(session, name, true, true, fh, fhp)) { + if (__wt_handle_search(session, name, true, fh, fhp)) { err: if (open_called) WT_TRET(fh->fh_close(session, fh)); if (fh != NULL) { diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index e79054e56ed..0d25fcaf7c3 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -52,7 +52,7 @@ __im_directory_sync(WT_SESSION_IMPL *session, const char *path) static int __im_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) { - *existp = __wt_handle_search(session, name, false, true, NULL, NULL); + *existp = __wt_handle_search(session, name, false, NULL, NULL); return (0); } @@ -66,7 +66,7 @@ __im_file_remove(WT_SESSION_IMPL *session, const char *name) WT_DECL_RET; WT_FH *fh; - if (__wt_handle_search(session, name, true, true, NULL, &fh)) { + if (__wt_handle_search(session, name, true, NULL, &fh)) { WT_ASSERT(session, fh->ref == 1); /* Force a discard of the handle. */ @@ -150,9 +150,9 @@ __im_file_size( im = S2C(session)->inmemory; __wt_spin_lock(session, &im->lock); - if (__wt_handle_search(session, name, false, false, NULL, &fh)) { + if (__wt_handle_search(session, name, true, NULL, &fh)) { *sizep = (wt_off_t)fh->buf.size; - __wt_handle_search_unlock(session); + ret = __wt_close(session, &fh); } else ret = ENOENT; diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c index 68d70594582..7d3049f6c2d 100644 --- a/src/os_posix/os_fs.c +++ b/src/os_posix/os_fs.c @@ -164,7 +164,7 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name) char *path; #ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, name, false, true, NULL, NULL)) + if (__wt_handle_search(session, name, false, NULL, NULL)) WT_RET_MSG(session, EINVAL, "%s: file-remove: file has open handles", name); #endif @@ -191,10 +191,10 @@ __posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) char *from_path, *to_path; #ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, from, false, true, NULL, NULL)) + if (__wt_handle_search(session, from, false, NULL, NULL)) WT_RET_MSG(session, EINVAL, "%s: file-rename: file has open handles", from); - if (__wt_handle_search(session, to, false, true, NULL, NULL)) + if (__wt_handle_search(session, to, false, NULL, NULL)) WT_RET_MSG(session, EINVAL, "%s: file-rename: file has open handles", to); #endif diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index bf8232419e9..7f2c797dbe4 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -55,7 +55,7 @@ __win_file_remove(WT_SESSION_IMPL *session, const char *name) char *path; #ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, name, false, true, NULL, NULL)) + if (__wt_handle_search(session, name, false, NULL, NULL)) WT_RET_MSG(session, EINVAL, "%s: file-remove: file has open handles", name); #endif @@ -83,10 +83,10 @@ __win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) char *from_path, *to_path; #ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, from, false, true, NULL, NULL)) + if (__wt_handle_search(session, from, false, NULL, NULL)) WT_RET_MSG(session, EINVAL, "%s: file-rename: file has open handles", from); - if (__wt_handle_search(session, to, false, true, NULL, NULL)) + if (__wt_handle_search(session, to, false, NULL, NULL)) WT_RET_MSG(session, EINVAL, "%s: file-rename: file has open handles", to); #endif -- cgit v1.2.1 From d2f5a34263ee812615d76172abbcc4d60f230ff4 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 1 Apr 2016 08:39:33 -0400 Subject: WT-2525: in-memory configurations: miscellaneous cleanups Coverity 1353725, 1353726: __wt_metadata_search() and __wt_turtle_read() could return errors after allocating memory, which was then leaked by their callers. --- src/meta/meta_table.c | 4 ++++ src/meta/meta_turtle.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index e5f2727b5b6..dd65f1a7ef9 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -290,6 +290,10 @@ __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_ERR(cursor->get_value(cursor, &value)); WT_ERR(__wt_strdup(session, value, valuep)); + err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); + + if (ret != 0) + __wt_free(session, *valuep); return (ret); } diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index d958e733cbe..0b287c228e5 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -282,6 +282,9 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) err: WT_TRET(__wt_close(session, &fh)); __wt_scr_free(session, &buf); + + if (ret != 0) + __wt_free(session, *valuep); return (ret); } -- cgit v1.2.1 From ca04aafcd12b00e8821edea09b635de2c1efadb9 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 1 Apr 2016 08:44:25 -0400 Subject: WT-2525: in-memory configurations: miscellaneous cleanups Coverity 1353727: va_copy() missing the va_end() call. --- src/os_common/os_fs_inmemory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index 0d25fcaf7c3..5b9c849d225 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -243,6 +243,7 @@ __im_handle_printf( for (;;) { va_copy(ap_copy, ap); len = (size_t)vsnprintf(tmp->mem, tmp->memsize, fmt, ap_copy); + va_end(ap_copy); if (len < tmp->memsize) { tmp->data = tmp->mem; tmp->size = len; -- cgit v1.2.1 From 303299ffb10567ed4f8d27c971cbdf3fe9d9f84a Mon Sep 17 00:00:00 2001 From: Sasha Fedorova Date: Fri, 1 Apr 2016 12:57:50 -0700 Subject: Added a check for HAVE_POSIX_FADVISE to fix a compilation error on OS X. --- src/block/block_read.c | 4 ++++ src/block/block_write.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/block/block_read.c b/src/block/block_read.c index 6f0c41c1b5c..8ce1c8dbdaf 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -40,9 +40,13 @@ __wt_bm_preload( ret = block->fh->fh_map_preload(session, block->fh, (uint8_t *)bm->map + offset, size); else +#if defined(HAVE_POSIX_FADVISE) ret = block->fh->fh_advise(session, block->fh, (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED); +#else + ret = 0; +#endif if (ret == 0) return (0); diff --git a/src/block/block_write.c b/src/block/block_write.c index 134272b52f9..a4e4b39ec50 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -42,8 +42,10 @@ __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) return (0); block->os_cache = 0; +#if defined(HAVE_POSIX_FADVISE) WT_ERR(block->fh->fh_advise(session, block->fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)); +#endif return (0); err: /* Ignore ENOTSUP, but don't try again. */ -- cgit v1.2.1 From f81a2adf42dc1e3e8215850a791e69133e0aeea7 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 1 Apr 2016 16:45:44 -0400 Subject: WT-2527: OS X compile error, missing POSIX_FADV_WILLNEED #define OS X needs fake POSIX_FADV_XXX #defines as well as Windows, move them to os.h. --- src/block/block_read.c | 4 ---- src/block/block_write.c | 2 -- src/include/msvc.h | 11 ----------- src/include/os.h | 12 ++++++++++++ 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/block/block_read.c b/src/block/block_read.c index 8ce1c8dbdaf..6f0c41c1b5c 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -40,13 +40,9 @@ __wt_bm_preload( ret = block->fh->fh_map_preload(session, block->fh, (uint8_t *)bm->map + offset, size); else -#if defined(HAVE_POSIX_FADVISE) ret = block->fh->fh_advise(session, block->fh, (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED); -#else - ret = 0; -#endif if (ret == 0) return (0); diff --git a/src/block/block_write.c b/src/block/block_write.c index a4e4b39ec50..134272b52f9 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -42,10 +42,8 @@ __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) return (0); block->os_cache = 0; -#if defined(HAVE_POSIX_FADVISE) WT_ERR(block->fh->fh_advise(session, block->fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)); -#endif return (0); err: /* Ignore ENOTSUP, but don't try again. */ diff --git a/src/include/msvc.h b/src/include/msvc.h index 222c24c3bc6..d5be5bd8c60 100644 --- a/src/include/msvc.h +++ b/src/include/msvc.h @@ -16,17 +16,6 @@ #define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */ #define WT_SIZET_FMT "Iu" /* size_t format string */ -/* - * The Windows fadvise calls will return ENOTSUP, but the WiredTiger code - * currently uses POSIX flags in the API. - */ -#ifndef POSIX_FADV_DONTNEED -#define POSIX_FADV_DONTNEED 0 -#endif -#ifndef POSIX_FADV_WILLNEED -#define POSIX_FADV_WILLNEED 0 -#endif - /* * Add MSVC-specific attributes and pragmas to types and function declarations. */ diff --git a/src/include/os.h b/src/include/os.h index 5034b17511d..af550b218b8 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -58,6 +58,18 @@ (t1).tv_nsec < (t2).tv_nsec ? -1 : \ (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1) +/* + * The underlying OS calls return ENOTSUP if posix_fadvise functionality isn't + * available, but the WiredTiger code uses the the POSIX flag names in the API. + * Use two values so the underlying code can distinguish. + */ +#ifndef POSIX_FADV_DONTNEED +#define POSIX_FADV_DONTNEED 0x01 +#endif +#ifndef POSIX_FADV_WILLNEED +#define POSIX_FADV_WILLNEED 0x02 +#endif + #define WT_OPEN_CREATE 0x001 /* Create is OK */ #define WT_OPEN_EXCLUSIVE 0x002 /* Exclusive open */ #define WT_OPEN_FIXED 0x004 /* Path isn't relative to home */ -- cgit v1.2.1 From 124c9d5ae0cb648dc6fc712fe3acc4e95d714aa6 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 1 Apr 2016 17:09:34 -0400 Subject: WT-2528: style error in WiredTiger build --- src/include/os.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/os.h b/src/include/os.h index af550b218b8..905f43c305e 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -60,8 +60,8 @@ /* * The underlying OS calls return ENOTSUP if posix_fadvise functionality isn't - * available, but the WiredTiger code uses the the POSIX flag names in the API. - * Use two values so the underlying code can distinguish. + * available, but WiredTiger uses the POSIX flag names in the API. Use distinct + * values so the underlying code can distinguish. */ #ifndef POSIX_FADV_DONTNEED #define POSIX_FADV_DONTNEED 0x01 -- cgit v1.2.1 From db105a8b177ffc027624629eeae05007c8ae2967 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 4 Apr 2016 11:53:29 +1000 Subject: WT-2529 Move an assertion related to fsync and readonly connections. It is still reasonable to fsync the standard I/O channels in a read only connection. --- src/include/misc.i | 2 -- src/os_posix/os_fs.c | 2 ++ src/os_win/os_fs.c | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/include/misc.i b/src/include/misc.i index a96ce405c89..114b711ac88 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -273,8 +273,6 @@ __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) static inline int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_RET(__wt_verbose( session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->name)); diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c index 7d3049f6c2d..86aa8db8f4f 100644 --- a/src/os_posix/os_fs.c +++ b/src/os_posix/os_fs.c @@ -18,6 +18,8 @@ __posix_sync(WT_SESSION_IMPL *session, { WT_DECL_RET; + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + #ifdef HAVE_SYNC_FILE_RANGE if (!block) { WT_SYSCALL_RETRY(sync_file_range(fd, diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 7f2c797dbe4..462773cb9fb 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -395,6 +395,8 @@ __win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { WT_DECL_RET; + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + /* * We don't open Windows system handles when opening directories * for flushing, as it is not necessary (or possible) to flush -- cgit v1.2.1 From a7c59bddf2de5e58ad6f53a07fe6cec14a448d41 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 4 Apr 2016 11:55:12 +1000 Subject: WT-2529 Fixup readonly test to not mask exit error codes. The file system permissions cleanup was masking real failures. --- test/readonly/smoke.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/readonly/smoke.sh b/test/readonly/smoke.sh index 740deb5743a..8dba513e7af 100755 --- a/test/readonly/smoke.sh +++ b/test/readonly/smoke.sh @@ -1,6 +1,6 @@ #!/bin/sh -trap 'chmod -R u+w WT_*; exit 0' 0 1 2 3 13 15 +trap 'chmod -R u+w WT_*' 0 1 2 3 13 15 set -e -- cgit v1.2.1 From 659c977eb08d9c08ef903f5d876ac242daf424cc Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 4 Apr 2016 10:04:14 -0400 Subject: WT-2531: in-memory tables are wasting space in truncation The in-memory truncate call wasn't extending the file (by setting the size of the WT_ITEM after truncation extended it). --- src/os_common/os_fs_inmemory.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index 5b9c849d225..218deb15d29 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -334,17 +334,25 @@ __im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) * POSIX ftruncate. */ static int -__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) { WT_DECL_RET; WT_IM *im; + size_t off; im = S2C(session)->inmemory; __wt_spin_lock(session, &im->lock); - WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)len)); - memset((uint8_t *) - fh->buf.mem + fh->buf.size, 0, fh->buf.memsize - fh->buf.size); + /* + * Grow the buffer as necessary, clear any new space in the file, + * and reset the file's data length. + */ + off = (size_t)offset; + WT_ERR(__wt_buf_grow(session, &fh->buf, off)); + if (fh->buf.size < off) + memset((uint8_t *) + fh->buf.data + fh->buf.size, 0, off - fh->buf.size); + fh->buf.size = off; err: __wt_spin_unlock(session, &im->lock); return (ret); -- cgit v1.2.1 From af4e3f14bd51f3c4149f64bcf4e2e249c61c5136 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 4 Apr 2016 10:13:09 -0400 Subject: WT-2531: in-memory tables are wasting space in truncation Don't truncate the file when running in-memory, it will allocate relatively large chunks of buffer memory in the underlying OS layer that we'll never use. --- src/block/block_ckpt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c index a0aadb43b93..a861a21876b 100644 --- a/src/block/block_ckpt.c +++ b/src/block/block_ckpt.c @@ -135,8 +135,11 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, * that was done when the checkpoint was first written (re-writing the * checkpoint might possibly make it relevant here, but it's unlikely * enough I don't bother). + * + * If in-memory, we don't read or write the object, and the truncate + * will unnecessarily allocate buffer space. */ - if (!checkpoint) { + if (!checkpoint && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) { /* * The truncate might fail if there's a file mapping (if there's * an open checkpoint on the file), that's OK. -- cgit v1.2.1 From aa0fe8d687acd6b6250eeb52ba166b93e9d547e6 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 4 Apr 2016 11:03:02 -0400 Subject: WT-2532: WT_STREAM_APPEND and WT_STREAM_LINE_BUFFER flag overlap. The WT_STREAM_APPEND and WT_STREAM_LINE_BUFFER flags have the same value. This means any stream opened for appending will also be line-buffered, and could possibly cause failure by appearing to set WT_STREAM_APPEND plus WT_STREAM_READ or WT_STREAM_WRITE, when only one can legally be specified. --- src/include/os.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/os.h b/src/include/os.h index 905f43c305e..2ff41d39f46 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -75,9 +75,9 @@ #define WT_OPEN_FIXED 0x004 /* Path isn't relative to home */ #define WT_OPEN_READONLY 0x008 /* Readonly open */ #define WT_STREAM_APPEND 0x010 /* Open a stream: append */ -#define WT_STREAM_LINE_BUFFER 0x010 /* Line buffer the stream */ -#define WT_STREAM_READ 0x020 /* Open a stream: read */ -#define WT_STREAM_WRITE 0x040 /* Open a stream: write */ +#define WT_STREAM_LINE_BUFFER 0x020 /* Line buffer the stream */ +#define WT_STREAM_READ 0x040 /* Open a stream: read */ +#define WT_STREAM_WRITE 0x080 /* Open a stream: write */ struct __wt_fh { const char *name; /* File name */ -- cgit v1.2.1 From 77caa9415b05f064d3c5e3c953e5ab7a2d7b7eec Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 4 Apr 2016 16:30:02 -0400 Subject: SERVER-23504: Coverity analysis defect 98177: Resource leak Don't leak the variable value if the call to __wt_fprintf fails. --- src/cursor/cur_backup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index 2ba73eb86c9..5be9b311a79 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -440,6 +440,7 @@ __backup_list_uri_append( WT_SESSION_IMPL *session, const char *name, bool *skip) { WT_CURSOR_BACKUP *cb; + WT_DECL_RET; char *value; cb = session->bkp_cursor; @@ -472,8 +473,9 @@ __backup_list_uri_append( /* Add the metadata entry to the backup file. */ WT_RET(__wt_metadata_search(session, name, &value)); - WT_RET(__wt_fprintf(session, cb->bfh, "%s\n%s\n", name, value)); + ret = __wt_fprintf(session, cb->bfh, "%s\n%s\n", name, value); __wt_free(session, value); + WT_RET(ret); /* Add file type objects to the list of files to be copied. */ if (WT_PREFIX_MATCH(name, "file:")) -- cgit v1.2.1 From 95ffda62606c136fb571d9e15460a12b9c2d1074 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 5 Apr 2016 11:10:46 +1000 Subject: WT-2533 Don't let in-memory tables return a zero size. Returning a zero size breaks MongoDB replication. Return a non-zero size for now until SERVER-23526 is resolved. --- src/os_common/os_fs_inmemory.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index 218deb15d29..3b96e87157e 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -308,7 +308,12 @@ __im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) { WT_UNUSED(session); - *sizep = (wt_off_t)fh->buf.size; + /* + * XXX hack - MongoDB assumes that any file with content will have a + * non-zero size. In memory tables generally are zero-sized, make + * MongoDB happy. + */ + *sizep = fh->buf.size == 0 ? 0 : (wt_off_t)fh->buf.size; return (0); } -- cgit v1.2.1 From b53c3b161e1431fca9a453e3ab600a85357420b7 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 5 Apr 2016 11:14:35 +1000 Subject: WT-2533 Fix a typo in the last commit - actually return non-zero. --- src/os_common/os_fs_inmemory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index 3b96e87157e..45ba2ae8c4d 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -313,7 +313,7 @@ __im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) * non-zero size. In memory tables generally are zero-sized, make * MongoDB happy. */ - *sizep = fh->buf.size == 0 ? 0 : (wt_off_t)fh->buf.size; + *sizep = fh->buf.size == 0 ? 1024 : (wt_off_t)fh->buf.size; return (0); } -- cgit v1.2.1 From a81c5bed72a9660406a8c7cc261a616fff9e8e02 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 5 Apr 2016 11:23:34 +1000 Subject: WT-2533 Make __im_file_size consistent with __im_handle_size. --- src/os_common/os_fs_inmemory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index 45ba2ae8c4d..49600557d10 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -151,12 +151,12 @@ __im_file_size( __wt_spin_lock(session, &im->lock); if (__wt_handle_search(session, name, true, NULL, &fh)) { - *sizep = (wt_off_t)fh->buf.size; - ret = __wt_close(session, &fh); + WT_ERR(__im_handle_size(session, fh, sizep)); + WT_ERR(__wt_close(session, &fh)); } else ret = ENOENT; - __wt_spin_unlock(session, &im->lock); +err: __wt_spin_unlock(session, &im->lock); return (ret); } -- cgit v1.2.1 From c31217f30fafde000239a93adcfe960546f13943 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 5 Apr 2016 11:42:24 +1000 Subject: WT-2533 Fix compiler error re: function prototypes. --- src/os_common/os_fs_inmemory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index 49600557d10..260514eac66 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -8,6 +8,8 @@ #include "wt_internal.h" +static int __im_handle_size(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); + /* * In-memory information. */ -- cgit v1.2.1