summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexg@wiredtiger.com>2014-02-27 13:00:01 +1100
committerAlex Gorrod <alexg@wiredtiger.com>2014-02-27 13:00:01 +1100
commit55e1132fd2e77a2e20ed477d834d4e760ad3ab7b (patch)
tree0eced28f96af7547ccb122c6b0689032fe6217bc
parentc4787a483ac17df3cd0d5f9753e25db854af7c25 (diff)
parent9184b4247673a11c4ffce6eb49ab37a1a9db83c0 (diff)
downloadmongo-55e1132fd2e77a2e20ed477d834d4e760ad3ab7b.tar.gz
Merge pull request #847 from wiredtiger/checkpoint-directio
Do direct I/O reads on checkpoints to reduce the VM pressure of LSM workloads
-rw-r--r--dist/api_data.py9
-rw-r--r--dist/flags.py1
-rw-r--r--src/block/block_mgr.c9
-rw-r--r--src/block/block_open.c6
-rw-r--r--src/btree/bt_handle.c4
-rw-r--r--src/config/config_def.c4
-rw-r--r--src/conn/conn_api.c1
-rw-r--r--src/include/extern.h2
-rw-r--r--src/include/flags.h1
-rw-r--r--src/include/wiredtiger.in8
-rw-r--r--src/os_posix/os_open.c9
-rw-r--r--src/schema/schema_create.c3
12 files changed, 40 insertions, 17 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index 5509032d148..412f0603efd 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -613,8 +613,13 @@ methods = {
Use \c O_DIRECT to access files. Options are given as a list,
such as <code>"direct_io=[data]"</code>. Configuring
\c direct_io requires care, see @ref
- tuning_system_buffer_cache_direct_io for important warnings''',
- type='list', choices=['data', 'log']),
+ tuning_system_buffer_cache_direct_io for important warnings.
+ Including \c "data" will cause WiredTiger data files to use
+ \c O_DIRECT, including \c "log" will cause WiredTiger log files
+ to use \c O_DIRECT, and including \c "checkpoint" will cause
+ WiredTiger data files opened at a checkpoint (i.e: read only) to
+ use \c O_DIRECT''',
+ type='list', choices=['checkpoint', 'data', 'log']),
Config('extensions', '', r'''
list of shared library extensions to load (using dlopen).
Any values specified to an library extension are passed to
diff --git a/dist/flags.py b/dist/flags.py
index 7ca1ee3144d..2a6adfccf43 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -15,6 +15,7 @@ flags = {
'SYNC_WRITE_LEAVES',
],
'file_types' : [
+ 'FILE_TYPE_CHECKPOINT',
'FILE_TYPE_DATA',
'FILE_TYPE_LOG'
],
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 04c24a6a3b6..35201008622 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -405,8 +405,9 @@ __bm_method_set(WT_BM *bm, int readonly)
* Open a file.
*/
int
-__wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename,
- const char *cfg[], int forced_salvage, uint32_t allocsize, WT_BM **bmp)
+__wt_block_manager_open(WT_SESSION_IMPL *session,
+ const char *filename, const char *cfg[],
+ int forced_salvage, int readonly, uint32_t allocsize, WT_BM **bmp)
{
WT_BM *bm;
WT_DECL_RET;
@@ -416,8 +417,8 @@ __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename,
WT_RET(__wt_calloc_def(session, 1, &bm));
__bm_method_set(bm, 0);
- WT_ERR(__wt_block_open(
- session, filename, cfg, forced_salvage, allocsize, &bm->block));
+ WT_ERR(__wt_block_open(session, filename, cfg,
+ forced_salvage, readonly, allocsize, &bm->block));
*bmp = bm;
return (0);
diff --git a/src/block/block_open.c b/src/block/block_open.c
index 1132cb85a6c..b172d0bb009 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -95,7 +95,7 @@ __block_destroy(WT_SESSION_IMPL *session, WT_BLOCK *block)
int
__wt_block_open(WT_SESSION_IMPL *session,
const char *filename, const char *cfg[],
- int forced_salvage, uint32_t allocsize, WT_BLOCK **blockp)
+ int forced_salvage, int readonly, uint32_t allocsize, WT_BLOCK **blockp)
{
WT_BLOCK *block;
WT_CONFIG_ITEM cval;
@@ -159,7 +159,9 @@ __wt_block_open(WT_SESSION_IMPL *session,
/* Open the underlying file handle. */
WT_ERR(__wt_open(
- session, filename, 0, 0, WT_FILE_TYPE_DATA, &block->fh));
+ session, filename, 0, 0,
+ readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA,
+ &block->fh));
/* Initialize the live checkpoint's lock. */
WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 2da886b4423..f6cc4cc6fb3 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -69,8 +69,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
if (!WT_PREFIX_SKIP(filename, "file:"))
WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI");
- WT_ERR(__wt_block_manager_open(session, filename,
- dhandle->cfg, forced_salvage, btree->allocsize, &btree->bm));
+ WT_ERR(__wt_block_manager_open(session, filename, dhandle->cfg,
+ forced_salvage, readonly, btree->allocsize, &btree->bm));
bm = btree->bm;
/*
diff --git a/src/config/config_def.c b/src/config/config_def.c
index abe6713696c..2c01cac1a85 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -250,7 +250,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
,
{ "checkpoint_sync", "boolean", NULL, NULL},
{ "create", "boolean", NULL, NULL},
- { "direct_io", "list", "choices=[\"data\",\"log\"]", NULL},
+ { "direct_io", "list",
+ "choices=[\"checkpoint\",\"data\",\"log\"]",
+ NULL},
{ "error_prefix", "string", NULL, NULL},
{ "eviction_dirty_target", "int", "min=10,max=99", NULL},
{ "eviction_target", "int", "min=10,max=99", NULL},
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index bcbb8e3ceb8..b1992793827 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -1024,6 +1024,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
const char *name;
uint32_t flag;
} *ft, file_types[] = {
+ { "checkpoint", WT_FILE_TYPE_CHECKPOINT },
{ "data", WT_FILE_TYPE_DATA },
{ "log", WT_FILE_TYPE_LOG },
{ NULL, 0 }
diff --git a/src/include/extern.h b/src/include/extern.h
index bd24cb02d61..ba026253a5b 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -126,6 +126,7 @@ extern int __wt_block_manager_open(WT_SESSION_IMPL *session,
const char *filename,
const char *cfg[],
int forced_salvage,
+ int readonly,
uint32_t allocsize,
WT_BM **bmp);
extern int __wt_block_manager_truncate( WT_SESSION_IMPL *session,
@@ -138,6 +139,7 @@ extern int __wt_block_open(WT_SESSION_IMPL *session,
const char *filename,
const char *cfg[],
int forced_salvage,
+ int readonly,
uint32_t allocsize,
WT_BLOCK **blockp);
extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block);
diff --git a/src/include/flags.h b/src/include/flags.h
index 31eed83e351..89f2450f3af 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -10,6 +10,7 @@
#define WT_CONN_PANIC 0x00000002
#define WT_CONN_SERVER_RUN 0x00000001
#define WT_EVICTION_SERVER_LOCKED 0x00000004
+#define WT_FILE_TYPE_CHECKPOINT 0x00000004
#define WT_FILE_TYPE_DATA 0x00000002
#define WT_FILE_TYPE_LOG 0x00000001
#define WT_LOGSCAN_FIRST 0x00000008
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 8bcdc431bd9..6334bca2061 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -1493,8 +1493,12 @@ struct __wt_connection {
* @config{direct_io, Use \c O_DIRECT to access files. Options are given as a
* list\, such as <code>"direct_io=[data]"</code>. Configuring \c direct_io
* requires care\, see @ref tuning_system_buffer_cache_direct_io for important
- * warnings., a list\, with values chosen from the following options: \c
- * "data"\, \c "log"; default empty.}
+ * warnings. Including \c "data" will cause WiredTiger data files to use \c
+ * O_DIRECT\, including \c "log" will cause WiredTiger log files to use \c
+ * O_DIRECT\, and including \c "checkpoint" will cause WiredTiger data files
+ * opened at a checkpoint (i.e: read only) to use \c O_DIRECT., a list\, with
+ * values chosen from the following options: \c "checkpoint"\, \c "data"\, \c
+ * "log"; default empty.}
* @config{error_prefix, prefix string for error messages., a string; default
* empty.}
* @config{eviction_dirty_target, continue evicting until the cache has less
diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c
index 6ef4caadd0a..c6938dad9fd 100644
--- a/src/os_posix/os_open.c
+++ b/src/os_posix/os_open.c
@@ -107,7 +107,8 @@ __wt_open(WT_SESSION_IMPL *session,
#endif
#ifdef O_NOATIME
/* Avoid updating metadata for read-only workloads. */
- if (dio_type == WT_FILE_TYPE_DATA)
+ if (dio_type == WT_FILE_TYPE_DATA ||
+ dio_type == WT_FILE_TYPE_CHECKPOINT)
f |= O_NOATIME;
#endif
@@ -157,7 +158,8 @@ __wt_open(WT_SESSION_IMPL *session,
#if defined(HAVE_POSIX_FADVISE)
/* Disable read-ahead on trees: it slows down random read workloads. */
- if (dio_type == WT_FILE_TYPE_DATA)
+ if (dio_type == WT_FILE_TYPE_DATA ||
+ dio_type == WT_FILE_TYPE_CHECKPOINT)
WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM));
#endif
@@ -174,7 +176,8 @@ __wt_open(WT_SESSION_IMPL *session,
WT_ERR(__wt_filesize(session, fh, &fh->size));
/* Configure file extension. */
- if (dio_type == WT_FILE_TYPE_DATA)
+ if (dio_type == WT_FILE_TYPE_DATA ||
+ dio_type == WT_FILE_TYPE_CHECKPOINT)
fh->extend_len = conn->data_extend_len;
/*
diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c
index 8d620198c09..8bd78ce0ac4 100644
--- a/src/schema/schema_create.c
+++ b/src/schema/schema_create.c
@@ -33,7 +33,8 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session,
* if you configure direct I/O and then don't do I/O in alignments and
* units of its happy place.
*/
- if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_DATA)) {
+ if (FLD_ISSET(conn->direct_io,
+ WT_FILE_TYPE_CHECKPOINT | WT_FILE_TYPE_DATA)) {
align = (int64_t)conn->buffer_alignment;
if (align != 0 && (cval.val < align || cval.val % align != 0))
WT_RET_MSG(session, EINVAL,