diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-11-29 15:38:52 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-29 05:09:44 +0000 |
commit | a0e76c7ab3729fc2d946dbab12ba946462961e42 (patch) | |
tree | 9973cd19d37d20e230123c77848805a017176826 /src/third_party | |
parent | d8440a2195b841c94388b57672e06fd30eeb05e0 (diff) | |
download | mongo-a0e76c7ab3729fc2d946dbab12ba946462961e42.tar.gz |
Import wiredtiger: 381b395c200afc62b54e6265032bb5662524031f from branch mongodb-master
ref: aacc797982..381b395c20
for: 5.2.0
WT-8451 Move the block manager's mmap support into the block cache code
Diffstat (limited to 'src/third_party')
-rw-r--r-- | src/third_party/wiredtiger/dist/filelist | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/dist/s_define.list | 1 | ||||
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_map.c | 77 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_read.c | 56 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block_cache/block_map.c | 127 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block_cache/block_mgr.c | 31 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_handle.c | 19 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/block.h | 1 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 13 |
10 files changed, 160 insertions, 169 deletions
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 9dae82777b8..2cda796ebdd 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -6,7 +6,6 @@ src/block/block_ckpt.c src/block/block_ckpt_scan.c src/block/block_compact.c src/block/block_ext.c -src/block/block_map.c src/block/block_open.c src/block/block_read.c src/block/block_session.c @@ -15,6 +14,7 @@ src/block/block_tiered.c src/block/block_vrfy.c src/block/block_write.c src/block_cache/block_cache.c +src/block_cache/block_map.c src/block_cache/block_mgr.c src/bloom/bloom.c src/btree/bt_compact.c diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list index 1cb6c2c4cac..14cd37f2e58 100644 --- a/src/third_party/wiredtiger/dist/s_define.list +++ b/src/third_party/wiredtiger/dist/s_define.list @@ -35,6 +35,7 @@ WT_DEBUG_BYTE WT_ERR_ASSERT WT_ERR_ERROR_OK WT_EXT_FOREACH_OFF +WT_FILE_HANDLE_WILLNEED WT_GEN_VERBOSE_CATEGORIES WT_GEN_VERBOSE_ENUM WT_GEN_VERBOSE_ENUM_STR diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 0433fdf51fb..4320dda592c 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-master", - "commit": "aacc7979823e9554fb6d46f75113aea584a0fde6" + "commit": "381b395c200afc62b54e6265032bb5662524031f" } diff --git a/src/third_party/wiredtiger/src/block/block_map.c b/src/third_party/wiredtiger/src/block/block_map.c deleted file mode 100644 index 9a95005dec5..00000000000 --- a/src/third_party/wiredtiger/src/block/block_map.c +++ /dev/null @@ -1,77 +0,0 @@ -/*- - * Copyright (c) 2014-present MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_block_map -- - * Map a segment of the file in, if possible. - */ -int -__wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, - void *mapped_cookiep) -{ - WT_DECL_RET; - WT_FILE_HANDLE *handle; - - *(void **)mapped_regionp = NULL; - *lengthp = 0; - *(void **)mapped_cookiep = NULL; - - /* Map support is configurable. */ - if (!S2C(session)->mmap) - return (0); - - /* - * Turn off mapping when verifying the file, because we can't perform checksum validation of - * mapped segments, and verify has to checksum pages. - */ - if (block->verify) - return (0); - - /* - * Turn off mapping if the application configured a cache size maximum, we can't control how - * much of the cache size we use in that case. - */ - if (block->os_cache_max != 0) - return (0); - - /* - * There may be no underlying functionality. - */ - handle = block->fh->handle; - if (handle->fh_map == NULL) - return (0); - - /* - * Map the file into memory. Ignore not-supported errors, we'll read the file through the cache - * if map fails. - */ - ret = handle->fh_map(handle, (WT_SESSION *)session, mapped_regionp, lengthp, mapped_cookiep); - if (ret == EBUSY || ret == ENOTSUP) { - *(void **)mapped_regionp = NULL; - ret = 0; - } - - return (ret); -} - -/* - * __wt_block_unmap -- - * Unmap any mapped-in segment of the file. - */ -int -__wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, - void *mapped_cookie) -{ - WT_FILE_HANDLE *handle; - - /* Unmap the file from memory. */ - handle = block->fh->handle; - return (handle->fh_unmap(handle, (WT_SESSION *)session, mapped_region, length, mapped_cookie)); -} diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 3f3fb1c4e76..97207244a82 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -9,41 +9,6 @@ #include "wt_internal.h" /* - * __wt_bm_preload -- - * Pre-load a page. - */ -int -__wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) -{ - WT_BLOCK *block; - WT_DECL_RET; - WT_FH *fh; - WT_FILE_HANDLE *handle; - wt_off_t offset; - uint32_t checksum, objectid, size; - bool mapped; - - block = bm->block; - - WT_STAT_CONN_INCR(session, block_preload); - - /* Crack the cookie. */ - WT_RET(__wt_block_addr_unpack( - session, block, addr, addr_size, &objectid, &offset, &size, &checksum)); - - WT_RET(__wt_block_fh(session, block, objectid, &fh)); - handle = fh->handle; - mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; - if (mapped && handle->fh_map_preload != NULL) - ret = handle->fh_map_preload( - handle, (WT_SESSION *)session, (uint8_t *)bm->map + offset, size, bm->mapped_cookie); - if (!mapped && handle->fh_advise != NULL) - ret = handle->fh_advise( - handle, (WT_SESSION *)session, offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED); - return (ret); -} - -/* * __wt_bm_read -- * Map or read address cookie referenced block into a buffer. */ @@ -52,12 +17,8 @@ __wt_bm_read( WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) { WT_BLOCK *block; - WT_DECL_RET; - WT_FH *fh; - WT_FILE_HANDLE *handle; wt_off_t offset; uint32_t checksum, objectid, size; - bool mapped; block = bm->block; @@ -65,23 +26,6 @@ __wt_bm_read( WT_RET(__wt_block_addr_unpack( session, block, addr, addr_size, &objectid, &offset, &size, &checksum)); - /* - * Map the block if it's possible. - */ - WT_RET(__wt_block_fh(session, block, objectid, &fh)); - handle = fh->handle; - mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; - if (mapped && handle->fh_map_preload != NULL) { - buf->data = (uint8_t *)bm->map + offset; - buf->size = size; - ret = handle->fh_map_preload( - handle, (WT_SESSION *)session, buf->data, buf->size, bm->mapped_cookie); - - WT_STAT_CONN_INCR(session, block_map_read); - WT_STAT_CONN_INCRV(session, block_byte_map_read, size); - return (ret); - } - #ifdef HAVE_DIAGNOSTIC /* * In diagnostic mode, verify the block we're about to read isn't on the available list, or for diff --git a/src/third_party/wiredtiger/src/block_cache/block_map.c b/src/third_party/wiredtiger/src/block_cache/block_map.c new file mode 100644 index 00000000000..23bcc585fd4 --- /dev/null +++ b/src/third_party/wiredtiger/src/block_cache/block_map.c @@ -0,0 +1,127 @@ +/*- + * Copyright (c) 2014-present MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_blkcache_map -- + * Map a segment of the file in, if possible. + */ +int +__wt_blkcache_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, + void *mapped_cookiep) +{ + WT_DECL_RET; + WT_FILE_HANDLE *handle; + + *(void **)mapped_regionp = NULL; + *lengthp = 0; + *(void **)mapped_cookiep = NULL; + + /* Map support is configurable. */ + if (!S2C(session)->mmap) + return (0); + + /* + * Turn off mapping when verifying the file, because we can't perform checksum validation of + * mapped segments, and verify has to checksum pages. + */ + if (block->verify) + return (0); + + /* + * Turn off mapping if the application configured a cache size maximum, we can't control how + * much of the cache size we use in that case. + */ + if (block->os_cache_max != 0) + return (0); + + /* + * There may be no underlying functionality. + */ + handle = block->fh->handle; + if (handle->fh_map == NULL) + return (0); + + /* + * Map the file into memory. Ignore not-supported errors, we'll read the file through the cache + * if map fails. + */ + ret = handle->fh_map(handle, (WT_SESSION *)session, mapped_regionp, lengthp, mapped_cookiep); + if (ret == EBUSY || ret == ENOTSUP) { + *(void **)mapped_regionp = NULL; + ret = 0; + } + + return (ret); +} + +/* + * __wt_blkcache_unmap -- + * Unmap any mapped-in segment of the file. + */ +int +__wt_blkcache_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, + void *mapped_cookie) +{ + WT_FILE_HANDLE *handle; + + /* Unmap the file from memory. */ + handle = block->fh->handle; + return (handle->fh_unmap(handle, (WT_SESSION *)session, mapped_region, length, mapped_cookie)); +} + +/* + * __wt_blkcache_map_read -- + * Map address cookie referenced block into a buffer. + */ +int +__wt_blkcache_map_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, + size_t addr_size, bool *foundp) +{ + WT_BLOCK *block; + WT_FH *fh; + WT_FILE_HANDLE *handle; + wt_off_t offset; + uint32_t checksum, objectid, size; + + *foundp = false; + + /* + * FIXME WT-7872: The WT_BLOCK.map test is wrong; tiered storage assumes object IDs translate to + * WT_FH structures, not WT_BLOCK structures. When we check if the WT_BLOCK handle references a + * mapped object, that's not going to work as we might be about to switch to a different WT_FH + * handle which may or may not reference a mapped object. + */ + if (!bm->map) + return (0); + + block = bm->block; + + /* Crack the cookie. */ + WT_RET(__wt_block_addr_unpack( + session, block, addr, addr_size, &objectid, &offset, &size, &checksum)); + + /* Map the block if it's possible. */ + WT_RET(__wt_block_fh(session, block, objectid, &fh)); + handle = fh->handle; + if (handle->fh_map_preload != NULL && offset + size <= (wt_off_t)bm->maplen && + handle->fh_map_preload( + handle, (WT_SESSION *)session, (uint8_t *)bm->map + offset, size, bm->mapped_cookie) == 0) { + if (buf != NULL) { + buf->data = (uint8_t *)bm->map + offset; + buf->size = size; + } + + *foundp = true; + WT_STAT_CONN_INCR(session, block_map_read); + WT_STAT_CONN_INCRV(session, block_byte_map_read, size); + } + + return (0); +} diff --git a/src/third_party/wiredtiger/src/block_cache/block_mgr.c b/src/third_party/wiredtiger/src/block_cache/block_mgr.c index f60905849d5..406ba4675d2 100644 --- a/src/third_party/wiredtiger/src/block_cache/block_mgr.c +++ b/src/third_party/wiredtiger/src/block_cache/block_mgr.c @@ -108,7 +108,7 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, s * Read-only objects are optionally mapped into memory instead of being read into cache * buffers. */ - WT_RET(__wt_block_map(session, bm->block, &bm->map, &bm->maplen, &bm->mapped_cookie)); + WT_RET(__wt_blkcache_map(session, bm->block, &bm->map, &bm->maplen, &bm->mapped_cookie)); /* * If this handle is for a checkpoint, that is, read-only, there isn't a lot you can do with @@ -174,7 +174,7 @@ __bm_checkpoint_unload(WT_BM *bm, WT_SESSION_IMPL *session) /* Unmap any mapped segment. */ if (bm->map != NULL) - WT_TRET(__wt_block_unmap(session, bm->block, bm->map, bm->maplen, &bm->mapped_cookie)); + WT_TRET(__wt_blkcache_unmap(session, bm->block, bm->map, bm->maplen, &bm->mapped_cookie)); /* Unload the checkpoint. */ WT_TRET(__wt_block_checkpoint_unload(session, bm->block, !bm->is_live)); @@ -392,6 +392,11 @@ __bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr blkcache = &S2C(session)->blkcache; + /* Check for mapped blocks. */ + WT_RET(__wt_blkcache_map_read(bm, session, buf, addr, addr_size, &found)); + if (found) + return (0); + /* Check the block cache. */ skip_cache = true; if (blkcache->type != BLKCACHE_UNCONFIGURED) { @@ -410,27 +415,6 @@ __bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr } /* - * __bm_preload -- - * Pre-load a page. - */ -static int -__bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) -{ - WT_DECL_ITEM(tmp); - WT_DECL_RET; - - /* Ignore underlying preload errors, just use them as an indication preload didn't work. */ - if (__wt_bm_preload(bm, session, addr, addr_size) == 0) - return (0); - - /* Do it the slow way. */ - WT_RET(__wt_scr_alloc(session, 0, &tmp)); - ret = __bm_read(bm, session, tmp, addr, addr_size); - __wt_scr_free(session, &tmp); - return (ret); -} - -/* * __bm_salvage_end -- * End a block manager salvage. */ @@ -696,7 +680,6 @@ __bm_method_set(WT_BM *bm, bool readonly) bm->free = __bm_free; bm->is_mapped = __bm_is_mapped; bm->map_discard = __bm_map_discard; - bm->preload = __bm_preload; bm->read = __bm_read; bm->salvage_end = __bm_salvage_end; bm->salvage_next = __bm_salvage_next; diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index cd5cb50cc3f..61f8055f869 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -797,17 +797,30 @@ __btree_preload(WT_SESSION_IMPL *session) WT_ADDR_COPY addr; WT_BM *bm; WT_BTREE *btree; + WT_DECL_ITEM(tmp); + WT_DECL_RET; WT_REF *ref; + uint64_t block_preload; btree = S2BT(session); bm = btree->bm; + block_preload = 0; + + WT_RET(__wt_scr_alloc(session, 0, &tmp)); /* Pre-load the second-level internal pages. */ WT_INTL_FOREACH_BEGIN (session, btree->root.page, ref) - if (__wt_ref_addr_copy(session, ref, &addr)) - WT_RET(bm->preload(bm, session, addr.addr, addr.size)); + if (__wt_ref_addr_copy(session, ref, &addr)) { + WT_ERR(bm->read(bm, session, tmp, addr.addr, addr.size)); + ++block_preload; + } WT_INTL_FOREACH_END; - return (0); + +err: + __wt_scr_free(session, &tmp); + + WT_STAT_CONN_INCRV(session, block_preload, block_preload); + return (ret); } /* diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index a2f6753884f..9dac14a7a05 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -190,7 +190,6 @@ struct __wt_bm { int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *); int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t); - int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); int (*read)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t); int (*salvage_end)(WT_BM *, WT_SESSION_IMPL *); int (*salvage_next)(WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t *, bool *); diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 3f4b06363d4..6a5c9b68127 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -95,9 +95,16 @@ extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) extern int __wt_blkcache_get(WT_SESSION_IMPL *session, WT_ITEM *data, const uint8_t *addr, size_t addr_size, bool *foundp, bool *skip_cachep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_blkcache_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, + size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_blkcache_map_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, + const uint8_t *addr, size_t addr_size, bool *foundp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_blkcache_put(WT_SESSION_IMPL *session, WT_ITEM *data, const uint8_t *addr, size_t addr_size, bool checkpoint_io, bool write) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_blkcache_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, + size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_addr_pack(WT_BLOCK *block, uint8_t **pp, uint32_t objectid, wt_off_t offset, @@ -189,8 +196,6 @@ extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filenam uint32_t allocsize, WT_BM **bmp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, - size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list, wt_off_t offset, uint32_t size, bool live, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -220,8 +225,6 @@ extern int __wt_block_switch_object(WT_SESSION_IMPL *session, WT_BLOCK *block, u uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, - size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block) @@ -259,8 +262,6 @@ extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t f WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, - size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, |