diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2014-02-27 13:00:01 +1100 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2014-02-27 13:00:01 +1100 |
commit | 55e1132fd2e77a2e20ed477d834d4e760ad3ab7b (patch) | |
tree | 0eced28f96af7547ccb122c6b0689032fe6217bc | |
parent | c4787a483ac17df3cd0d5f9753e25db854af7c25 (diff) | |
parent | 9184b4247673a11c4ffce6eb49ab37a1a9db83c0 (diff) | |
download | mongo-55e1132fd2e77a2e20ed477d834d4e760ad3ab7b.tar.gz |
Merge pull request #847 from wiredtiger/checkpoint-directio
Do direct I/O reads on checkpoints to reduce the VM pressure of LSM workloads
-rw-r--r-- | dist/api_data.py | 9 | ||||
-rw-r--r-- | dist/flags.py | 1 | ||||
-rw-r--r-- | src/block/block_mgr.c | 9 | ||||
-rw-r--r-- | src/block/block_open.c | 6 | ||||
-rw-r--r-- | src/btree/bt_handle.c | 4 | ||||
-rw-r--r-- | src/config/config_def.c | 4 | ||||
-rw-r--r-- | src/conn/conn_api.c | 1 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/include/flags.h | 1 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 8 | ||||
-rw-r--r-- | src/os_posix/os_open.c | 9 | ||||
-rw-r--r-- | src/schema/schema_create.c | 3 |
12 files changed, 40 insertions, 17 deletions
diff --git a/dist/api_data.py b/dist/api_data.py index 5509032d148..412f0603efd 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -613,8 +613,13 @@ methods = { Use \c O_DIRECT to access files. Options are given as a list, such as <code>"direct_io=[data]"</code>. Configuring \c direct_io requires care, see @ref - tuning_system_buffer_cache_direct_io for important warnings''', - type='list', choices=['data', 'log']), + tuning_system_buffer_cache_direct_io for important warnings. + Including \c "data" will cause WiredTiger data files to use + \c O_DIRECT, including \c "log" will cause WiredTiger log files + to use \c O_DIRECT, and including \c "checkpoint" will cause + WiredTiger data files opened at a checkpoint (i.e: read only) to + use \c O_DIRECT''', + type='list', choices=['checkpoint', 'data', 'log']), Config('extensions', '', r''' list of shared library extensions to load (using dlopen). Any values specified to an library extension are passed to diff --git a/dist/flags.py b/dist/flags.py index 7ca1ee3144d..2a6adfccf43 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -15,6 +15,7 @@ flags = { 'SYNC_WRITE_LEAVES', ], 'file_types' : [ + 'FILE_TYPE_CHECKPOINT', 'FILE_TYPE_DATA', 'FILE_TYPE_LOG' ], diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c index 04c24a6a3b6..35201008622 100644 --- a/src/block/block_mgr.c +++ b/src/block/block_mgr.c @@ -405,8 +405,9 @@ __bm_method_set(WT_BM *bm, int readonly) * Open a file. */ int -__wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, - const char *cfg[], int forced_salvage, uint32_t allocsize, WT_BM **bmp) +__wt_block_manager_open(WT_SESSION_IMPL *session, + const char *filename, const char *cfg[], + int forced_salvage, int readonly, uint32_t allocsize, WT_BM **bmp) { WT_BM *bm; WT_DECL_RET; @@ -416,8 +417,8 @@ __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, WT_RET(__wt_calloc_def(session, 1, &bm)); __bm_method_set(bm, 0); - WT_ERR(__wt_block_open( - session, filename, cfg, forced_salvage, allocsize, &bm->block)); + WT_ERR(__wt_block_open(session, filename, cfg, + forced_salvage, readonly, allocsize, &bm->block)); *bmp = bm; return (0); diff --git a/src/block/block_open.c b/src/block/block_open.c index 1132cb85a6c..b172d0bb009 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -95,7 +95,7 @@ __block_destroy(WT_SESSION_IMPL *session, WT_BLOCK *block) int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], - int forced_salvage, uint32_t allocsize, WT_BLOCK **blockp) + int forced_salvage, int readonly, uint32_t allocsize, WT_BLOCK **blockp) { WT_BLOCK *block; WT_CONFIG_ITEM cval; @@ -159,7 +159,9 @@ __wt_block_open(WT_SESSION_IMPL *session, /* Open the underlying file handle. */ WT_ERR(__wt_open( - session, filename, 0, 0, WT_FILE_TYPE_DATA, &block->fh)); + session, filename, 0, 0, + readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA, + &block->fh)); /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 2da886b4423..f6cc4cc6fb3 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -69,8 +69,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) if (!WT_PREFIX_SKIP(filename, "file:")) WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI"); - WT_ERR(__wt_block_manager_open(session, filename, - dhandle->cfg, forced_salvage, btree->allocsize, &btree->bm)); + WT_ERR(__wt_block_manager_open(session, filename, dhandle->cfg, + forced_salvage, readonly, btree->allocsize, &btree->bm)); bm = btree->bm; /* diff --git a/src/config/config_def.c b/src/config/config_def.c index abe6713696c..2c01cac1a85 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -250,7 +250,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { , { "checkpoint_sync", "boolean", NULL, NULL}, { "create", "boolean", NULL, NULL}, - { "direct_io", "list", "choices=[\"data\",\"log\"]", NULL}, + { "direct_io", "list", + "choices=[\"checkpoint\",\"data\",\"log\"]", + NULL}, { "error_prefix", "string", NULL, NULL}, { "eviction_dirty_target", "int", "min=10,max=99", NULL}, { "eviction_target", "int", "min=10,max=99", NULL}, diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index bcbb8e3ceb8..b1992793827 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1024,6 +1024,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *name; uint32_t flag; } *ft, file_types[] = { + { "checkpoint", WT_FILE_TYPE_CHECKPOINT }, { "data", WT_FILE_TYPE_DATA }, { "log", WT_FILE_TYPE_LOG }, { NULL, 0 } diff --git a/src/include/extern.h b/src/include/extern.h index bd24cb02d61..ba026253a5b 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -126,6 +126,7 @@ extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], int forced_salvage, + int readonly, uint32_t allocsize, WT_BM **bmp); extern int __wt_block_manager_truncate( WT_SESSION_IMPL *session, @@ -138,6 +139,7 @@ extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], int forced_salvage, + int readonly, uint32_t allocsize, WT_BLOCK **blockp); extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block); diff --git a/src/include/flags.h b/src/include/flags.h index 31eed83e351..89f2450f3af 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -10,6 +10,7 @@ #define WT_CONN_PANIC 0x00000002 #define WT_CONN_SERVER_RUN 0x00000001 #define WT_EVICTION_SERVER_LOCKED 0x00000004 +#define WT_FILE_TYPE_CHECKPOINT 0x00000004 #define WT_FILE_TYPE_DATA 0x00000002 #define WT_FILE_TYPE_LOG 0x00000001 #define WT_LOGSCAN_FIRST 0x00000008 diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 8bcdc431bd9..6334bca2061 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1493,8 +1493,12 @@ struct __wt_connection { * @config{direct_io, Use \c O_DIRECT to access files. Options are given as a * list\, such as <code>"direct_io=[data]"</code>. Configuring \c direct_io * requires care\, see @ref tuning_system_buffer_cache_direct_io for important - * warnings., a list\, with values chosen from the following options: \c - * "data"\, \c "log"; default empty.} + * warnings. Including \c "data" will cause WiredTiger data files to use \c + * O_DIRECT\, including \c "log" will cause WiredTiger log files to use \c + * O_DIRECT\, and including \c "checkpoint" will cause WiredTiger data files + * opened at a checkpoint (i.e: read only) to use \c O_DIRECT., a list\, with + * values chosen from the following options: \c "checkpoint"\, \c "data"\, \c + * "log"; default empty.} * @config{error_prefix, prefix string for error messages., a string; default * empty.} * @config{eviction_dirty_target, continue evicting until the cache has less diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 6ef4caadd0a..c6938dad9fd 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -107,7 +107,8 @@ __wt_open(WT_SESSION_IMPL *session, #endif #ifdef O_NOATIME /* Avoid updating metadata for read-only workloads. */ - if (dio_type == WT_FILE_TYPE_DATA) + if (dio_type == WT_FILE_TYPE_DATA || + dio_type == WT_FILE_TYPE_CHECKPOINT) f |= O_NOATIME; #endif @@ -157,7 +158,8 @@ __wt_open(WT_SESSION_IMPL *session, #if defined(HAVE_POSIX_FADVISE) /* Disable read-ahead on trees: it slows down random read workloads. */ - if (dio_type == WT_FILE_TYPE_DATA) + if (dio_type == WT_FILE_TYPE_DATA || + dio_type == WT_FILE_TYPE_CHECKPOINT) WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM)); #endif @@ -174,7 +176,8 @@ __wt_open(WT_SESSION_IMPL *session, WT_ERR(__wt_filesize(session, fh, &fh->size)); /* Configure file extension. */ - if (dio_type == WT_FILE_TYPE_DATA) + if (dio_type == WT_FILE_TYPE_DATA || + dio_type == WT_FILE_TYPE_CHECKPOINT) fh->extend_len = conn->data_extend_len; /* diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c index 8d620198c09..8bd78ce0ac4 100644 --- a/src/schema/schema_create.c +++ b/src/schema/schema_create.c @@ -33,7 +33,8 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session, * if you configure direct I/O and then don't do I/O in alignments and * units of its happy place. */ - if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_DATA)) { + if (FLD_ISSET(conn->direct_io, + WT_FILE_TYPE_CHECKPOINT | WT_FILE_TYPE_DATA)) { align = (int64_t)conn->buffer_alignment; if (align != 0 && (cval.val < align || cval.val % align != 0)) WT_RET_MSG(session, EINVAL, |