diff options
author | sueloverso <sue@mongodb.com> | 2016-12-04 23:22:58 -0500 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2016-12-05 15:22:58 +1100 |
commit | 853430ea86b8e29cdfa9de34606405d52384d2db (patch) | |
tree | 60b562016008db77e3f610ad0067e3c40883fdd5 | |
parent | d2dfe81c95cb2cd2c04cf7e7711c8ad247d63ce1 (diff) | |
download | mongo-853430ea86b8e29cdfa9de34606405d52384d2db.tar.gz |
WT-2670 Add access_pattern_hint configuration for tables (#3155)
-rw-r--r-- | dist/api_config.py | 3 | ||||
-rw-r--r-- | dist/api_data.py | 8 | ||||
-rw-r--r-- | src/block/block_open.c | 6 | ||||
-rw-r--r-- | src/config/config_def.c | 105 | ||||
-rw-r--r-- | src/docs/upgrading.dox | 9 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 23 | ||||
-rw-r--r-- | src/os_posix/os_fs.c | 16 | ||||
-rw-r--r-- | src/os_win/os_fs.c | 10 |
8 files changed, 121 insertions, 59 deletions
diff --git a/dist/api_config.py b/dist/api_config.py index 1069a7037ce..d83a632321e 100644 --- a/dist/api_config.py +++ b/dist/api_config.py @@ -198,7 +198,8 @@ def get_default(c): return 'false' elif c.default == 'true': return 'true' - elif t == 'string' and c.default == 'none': + elif t == 'string' and c.default == 'none' and \ + not c.flags.get('choices', []): return '' elif t == 'category': return '(%s)' % (','.join('%s=%s' % (subc.name, get_default(subc)) diff --git a/dist/api_data.py b/dist/api_data.py index aa76ff45d1a..2d4371923e1 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -120,6 +120,14 @@ lsm_config = [ # Per-file configuration file_config = format_meta + [ + Config('access_pattern_hint', 'none', r''' + It is recommended that workloads that consist primarily of + updates and/or point queries specify \c random. Workloads that + do many cursor scans through large ranges of data specify + \c sequential and other workloads specify \c none. The + option leads to an advisory call to an appropriate operating + system API where available''', + choices=['none', 'random', 'sequential']), Config('block_allocation', 'best', r''' configure block allocation. Permitted values are \c "first" or \c "best"; the \c "first" configuration uses a first-available diff --git a/src/block/block_open.c b/src/block/block_open.c index 4d15942709a..07ceb4c8159 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -201,6 +201,12 @@ __wt_block_open(WT_SESSION_IMPL *session, * "direct_io=checkpoint" configures direct I/O for readonly data files. */ flags = 0; + WT_ERR(__wt_config_gets(session, cfg, "access_pattern_hint", &cval)); + if (WT_STRING_MATCH("random", cval.str, cval.len)) + LF_SET(WT_FS_OPEN_ACCESS_RAND); + else if (WT_STRING_MATCH("sequential", cval.str, cval.len)) + LF_SET(WT_FS_OPEN_ACCESS_SEQ); + if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT)) LF_SET(WT_FS_OPEN_DIRECTIO); if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA)) diff --git a/src/config/config_def.c b/src/config/config_def.c index b3b900f8c42..9d886cbf0bd 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -224,6 +224,9 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { + { "access_pattern_hint", "string", + NULL, "choices=[\"none\",\"random\",\"sequential\"]", + NULL, 0 }, { "allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0 }, @@ -401,6 +404,9 @@ static const WT_CONFIG_CHECK confchk_colgroup_meta[] = { }; static const WT_CONFIG_CHECK confchk_file_config[] = { + { "access_pattern_hint", "string", + NULL, "choices=[\"none\",\"random\",\"sequential\"]", + NULL, 0 }, { "allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0 }, @@ -459,6 +465,9 @@ static const WT_CONFIG_CHECK confchk_file_config[] = { }; static const WT_CONFIG_CHECK confchk_file_meta[] = { + { "access_pattern_hint", "string", + NULL, "choices=[\"none\",\"random\",\"sequential\"]", + NULL, 0 }, { "allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0 }, @@ -537,6 +546,9 @@ static const WT_CONFIG_CHECK confchk_index_meta[] = { }; static const WT_CONFIG_CHECK confchk_lsm_meta[] = { + { "access_pattern_hint", "string", + NULL, "choices=[\"none\",\"random\",\"sequential\"]", + NULL, 0 }, { "allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0 }, @@ -1075,12 +1087,13 @@ static const WT_CONFIG_ENTRY config_entries[] = { confchk_WT_SESSION_compact, 1 }, { "WT_SESSION.create", - "allocation_size=4KB,app_metadata=,block_allocation=best," - "block_compressor=,cache_resident=false,checksum=uncompressed," - "colgroups=,collator=,columns=,dictionary=0,encryption=(keyid=," - "name=),exclusive=false,extractor=,format=btree,huffman_key=," - "huffman_value=,ignore_in_memory_cache_size=false,immutable=false" - ",internal_item_max=0,internal_key_max=0," + "access_pattern_hint=none,allocation_size=4KB,app_metadata=," + "block_allocation=best,block_compressor=,cache_resident=false," + "checksum=uncompressed,colgroups=,collator=,columns=,dictionary=0" + ",encryption=(keyid=,name=),exclusive=false,extractor=," + "format=btree,huffman_key=,huffman_value=," + "ignore_in_memory_cache_size=false,immutable=false," + "internal_item_max=0,internal_key_max=0," "internal_key_truncate=true,internal_page_max=4KB,key_format=u," "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," "leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true," @@ -1090,7 +1103,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,source=,split_deepen_min_child=0," "split_deepen_per_child=0,split_pct=75,type=file,value_format=u", - confchk_WT_SESSION_create, 41 + confchk_WT_SESSION_create, 42 }, { "WT_SESSION.drop", "checkpoint_wait=true,force=false,lock_wait=true," @@ -1172,10 +1185,26 @@ static const WT_CONFIG_ENTRY config_entries[] = { confchk_colgroup_meta, 5 }, { "file.config", - "allocation_size=4KB,app_metadata=,block_allocation=best," - "block_compressor=,cache_resident=false,checksum=uncompressed," - "collator=,columns=,dictionary=0,encryption=(keyid=,name=)," - "format=btree,huffman_key=,huffman_value=," + "access_pattern_hint=none,allocation_size=4KB,app_metadata=," + "block_allocation=best,block_compressor=,cache_resident=false," + "checksum=uncompressed,collator=,columns=,dictionary=0," + "encryption=(keyid=,name=),format=btree,huffman_key=," + "huffman_value=,ignore_in_memory_cache_size=false," + "internal_item_max=0,internal_key_max=0," + "internal_key_truncate=true,internal_page_max=4KB,key_format=u," + "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," + "leaf_value_max=0,log=(enabled=true),memory_page_max=5MB," + "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," + "prefix_compression_min=4,split_deepen_min_child=0," + "split_deepen_per_child=0,split_pct=75,value_format=u", + confchk_file_config, 35 + }, + { "file.meta", + "access_pattern_hint=none,allocation_size=4KB,app_metadata=," + "block_allocation=best,block_compressor=,cache_resident=false," + "checkpoint=,checkpoint_lsn=,checksum=uncompressed,collator=," + "columns=,dictionary=0,encryption=(keyid=,name=),format=btree," + "huffman_key=,huffman_value=,id=," "ignore_in_memory_cache_size=false,internal_item_max=0," "internal_key_max=0,internal_key_truncate=true," "internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0," @@ -1183,24 +1212,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { "log=(enabled=true),memory_page_max=5MB,os_cache_dirty_max=0," "os_cache_max=0,prefix_compression=false,prefix_compression_min=4" ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," - "value_format=u", - confchk_file_config, 34 - }, - { "file.meta", - "allocation_size=4KB,app_metadata=,block_allocation=best," - "block_compressor=,cache_resident=false,checkpoint=," - "checkpoint_lsn=,checksum=uncompressed,collator=,columns=," - "dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key=" - ",huffman_value=,id=,ignore_in_memory_cache_size=false," - "internal_item_max=0,internal_key_max=0," - "internal_key_truncate=true,internal_page_max=4KB,key_format=u," - "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," - "leaf_value_max=0,log=(enabled=true),memory_page_max=5MB," - "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," - "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u," - "version=(major=0,minor=0)", - confchk_file_meta, 38 + "value_format=u,version=(major=0,minor=0)", + confchk_file_meta, 39 }, { "index.meta", "app_metadata=,collator=,columns=,extractor=,immutable=false," @@ -1208,23 +1221,23 @@ static const WT_CONFIG_ENTRY config_entries[] = { confchk_index_meta, 10 }, { "lsm.meta", - "allocation_size=4KB,app_metadata=,block_allocation=best," - "block_compressor=,cache_resident=false,checksum=uncompressed," - "chunks=,collator=,columns=,dictionary=0,encryption=(keyid=," - "name=),format=btree,huffman_key=,huffman_value=," - "ignore_in_memory_cache_size=false,internal_item_max=0," - "internal_key_max=0,internal_key_truncate=true," - "internal_page_max=4KB,key_format=u,key_gap=10,last=," - "leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," - "leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true," - "bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8," - "bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB," - "chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB," - "old_chunks=,os_cache_dirty_max=0,os_cache_max=0," - "prefix_compression=false,prefix_compression_min=4," - "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," - "value_format=u", - confchk_lsm_meta, 38 + "access_pattern_hint=none,allocation_size=4KB,app_metadata=," + "block_allocation=best,block_compressor=,cache_resident=false," + "checksum=uncompressed,chunks=,collator=,columns=,dictionary=0," + "encryption=(keyid=,name=),format=btree,huffman_key=," + "huffman_value=,ignore_in_memory_cache_size=false," + "internal_item_max=0,internal_key_max=0," + "internal_key_truncate=true,internal_page_max=4KB,key_format=u," + "key_gap=10,last=,leaf_item_max=0,leaf_key_max=0," + "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=true)," + "lsm=(auto_throttle=true,bloom=true,bloom_bit_count=16," + "bloom_config=,bloom_hash_count=8,bloom_oldest=false," + "chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,merge_max=15," + "merge_min=0),memory_page_max=5MB,old_chunks=," + "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," + "prefix_compression_min=4,split_deepen_min_child=0," + "split_deepen_per_child=0,split_pct=75,value_format=u", + confchk_lsm_meta, 39 }, { "table.meta", "app_metadata=,colgroups=,collator=,columns=,key_format=u," diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index caa6b036892..c173bebdda6 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -17,6 +17,15 @@ how much work is done at the beginning of a checkpoint to make the critical section of checkpoints complete more quickly. </dd> +<dt>Change to default fadvise setting for data files</dt> +<dd> +The default behavior for data files was to advise the file system to optimize +for random access on POSIX and Windows platforms. The default is now to not +advise about access patterns. There is a new \c access_pattern_hint +configuration string available to WT_SESSION::create that can be used +to configure the old default behavior. +</dd> + <dt>Checkpoint server created checkpoint names</dt> <dd> The ::wiredtiger_open checkpoint configuration no longer supports the diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 37788096f39..2365135e08d 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -995,6 +995,13 @@ struct __wt_session { * \c "table:stock". For a description of URI formats * see @ref data_sources. * @configstart{WT_SESSION.create, see dist/api_data.py} + * @config{access_pattern_hint, It is recommended that workloads that + * consist primarily of updates and/or point queries specify \c random. + * Workloads that do many cursor scans through large ranges of data + * specify \c sequential and other workloads specify \c none. The + * option leads to an advisory call to an appropriate operating system + * API where available., a string\, chosen from the following options: + * \c "none"\, \c "random"\, \c "sequential"; default \c none.} * @config{allocation_size, the file unit allocation size\, in bytes\, * must a power-of-two; smaller values decrease the file space required * by overflow items\, and the default value of 4KB is a good choice @@ -3734,21 +3741,25 @@ typedef enum { WT_FS_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */ } WT_FS_OPEN_FILE_TYPE; +/*! WT_FILE_SYSTEM::open_file flags: random access pattern */ +#define WT_FS_OPEN_ACCESS_RAND 0x001 +/*! WT_FILE_SYSTEM::open_file flags: sequential access pattern */ +#define WT_FS_OPEN_ACCESS_SEQ 0x002 /*! WT_FILE_SYSTEM::open_file flags: create if does not exist */ -#define WT_FS_OPEN_CREATE 0x001 +#define WT_FS_OPEN_CREATE 0x004 /*! WT_FILE_SYSTEM::open_file flags: direct I/O requested */ -#define WT_FS_OPEN_DIRECTIO 0x002 +#define WT_FS_OPEN_DIRECTIO 0x008 /*! WT_FILE_SYSTEM::open_file flags: file creation must be durable */ -#define WT_FS_OPEN_DURABLE 0x004 +#define WT_FS_OPEN_DURABLE 0x010 /*! * WT_FILE_SYSTEM::open_file flags: return EBUSY if exclusive use not available */ -#define WT_FS_OPEN_EXCLUSIVE 0x008 +#define WT_FS_OPEN_EXCLUSIVE 0x020 #ifndef DOXYGEN -#define WT_FS_OPEN_FIXED 0x010 /* Path not home relative (internal) */ +#define WT_FS_OPEN_FIXED 0x040 /* Path not home relative (internal) */ #endif /*! WT_FILE_SYSTEM::open_file flags: open is read-only */ -#define WT_FS_OPEN_READONLY 0x020 +#define WT_FS_OPEN_READONLY 0x080 /*! * WT_FILE_SYSTEM::remove or WT_FILE_SYSTEM::rename flags: the remove or rename diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c index 5f06892ce6e..26be0f049cc 100644 --- a/src/os_posix/os_fs.c +++ b/src/os_posix/os_fs.c @@ -575,7 +575,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, WT_FILE_HANDLE_POSIX *pfh; WT_SESSION_IMPL *session; mode_t mode; - int f; + int advise_flag, f; WT_UNUSED(file_system); @@ -676,17 +676,25 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, #if defined(HAVE_POSIX_FADVISE) /* - * Disable read-ahead on trees: it slows down random read workloads. + * If the user set an access pattern hint, call fadvise now. * Ignore fadvise when doing direct I/O, the kernel cache isn't * interesting. */ - if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA) { + if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA && + LF_ISSET(WT_FS_OPEN_ACCESS_RAND | WT_FS_OPEN_ACCESS_SEQ)) { + advise_flag = 0; + if (LF_ISSET(WT_FS_OPEN_ACCESS_RAND)) + advise_flag = POSIX_FADV_RANDOM; + if (LF_ISSET(WT_FS_OPEN_ACCESS_SEQ)) + advise_flag = POSIX_FADV_SEQUENTIAL; WT_SYSCALL( - posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret); + posix_fadvise(pfh->fd, 0, 0, advise_flag), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: handle-open: posix_fadvise", name); } +#else + WT_UNUSED(advise_flag); #endif directory_open: diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 7ab7178114b..6c74f2f411f 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -521,10 +521,16 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) f |= FILE_FLAG_WRITE_THROUGH; - /* Disable read-ahead on trees: it slows down random read workloads. */ - if (file_type == WT_FS_OPEN_FILE_TYPE_DATA) + /* If the user indicated a random workload, disable read-ahead. */ + if (file_type == WT_FS_OPEN_FILE_TYPE_DATA && + LF_ISSET(WT_FS_OPEN_ACCESS_RAND)) f |= FILE_FLAG_RANDOM_ACCESS; + /* If the user indicated a sequential workload, set that. */ + if (file_type == WT_FS_OPEN_FILE_TYPE_DATA && + LF_ISSET(WT_FS_OPEN_ACCESS_SEQ)) + f |= FILE_FLAG_SEQUENTIAL_SCAN; + win_fh->filehandle = CreateFileW(name_wide->data, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, dwCreationDisposition, f, NULL); |