diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-01-27 13:28:13 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-01-27 03:07:52 +0000 |
commit | 77d134c7a2bab846aaaa056b0883888a8219da2a (patch) | |
tree | 8caac27a3573139d221fdf93ba9c99b3e042bc78 /src/third_party/wiredtiger/src/block/block_read.c | |
parent | 5fad1f69662696c5b789392622aa34d370fb4825 (diff) | |
download | mongo-77d134c7a2bab846aaaa056b0883888a8219da2a.tar.gz |
Import wiredtiger: a52cd5a47a7e9af9e2c341e66f0ffdd9bc977930 from branch mongodb-4.4
ref: ef1f2937c3..a52cd5a47a
for: 4.4.4
WT-6309 Add support for start/stop arguments to wt printlog command
WT-6866 Refactor python backup tests initial base class
WT-6924 Queue history store pages for urgent eviction when cache pressure is high
WT-6946 Adding test tags to an initial set of test programs
WT-7068 Add column store support to test_hs03
WT-7084 Fix assert in test code and a comment error
WT-7109 Retain no longer supported configuration options for backward compatibility
WT-7113 Integrate prototype tiered storage code into WT
WT-7114 Revert Makefile code to always run the prototype script
Diffstat (limited to 'src/third_party/wiredtiger/src/block/block_read.c')
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_read.c | 91 |
1 files changed, 71 insertions, 20 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 8680f1f90f8..c5e3a1c193c 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -18,9 +18,10 @@ __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t WT_BLOCK *block; WT_DECL_ITEM(tmp); WT_DECL_RET; + WT_FH *fh; WT_FILE_HANDLE *handle; wt_off_t offset; - uint32_t checksum, size; + uint32_t checksum, logid, size; bool mapped; block = bm->block; @@ -28,9 +29,10 @@ __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t WT_STAT_CONN_INCR(session, block_preload); /* Crack the cookie. */ - WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum)); + WT_RET(__wt_block_buffer_to_addr(block, addr, &logid, &offset, &size, &checksum)); - handle = block->fh->handle; + WT_RET(__wt_block_fh(session, block, logid, &fh)); + handle = fh->handle; mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; if (mapped && handle->fh_map_preload != NULL) ret = handle->fh_map_preload( @@ -59,21 +61,23 @@ __wt_bm_read( { WT_BLOCK *block; WT_DECL_RET; + WT_FH *fh; WT_FILE_HANDLE *handle; wt_off_t offset; - uint32_t checksum, size; + uint32_t checksum, logid, size; bool mapped; WT_UNUSED(addr_size); block = bm->block; /* Crack the cookie. */ - WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum)); + WT_RET(__wt_block_buffer_to_addr(block, addr, &logid, &offset, &size, &checksum)); /* * Map the block if it's possible. */ - handle = block->fh->handle; + WT_RET(__wt_block_fh(session, block, logid, &fh)); + handle = fh->handle; mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; if (mapped && handle->fh_map_preload != NULL) { buf->data = (uint8_t *)bm->map + offset; @@ -96,7 +100,7 @@ __wt_bm_read( #endif /* Read the block. */ __wt_capacity_throttle(session, size, WT_THROTTLE_READ); - WT_RET(__wt_block_read_off(session, block, buf, offset, size, checksum)); + WT_RET(__wt_block_read_off(session, block, buf, logid, offset, size, checksum)); /* Optionally discard blocks from the system's buffer cache. */ WT_RET(__wt_block_discard(session, block, (size_t)size)); @@ -109,17 +113,17 @@ __wt_bm_read( * Dump a block into the log in 1KB chunks. */ static int -__wt_bm_corrupt_dump(WT_SESSION_IMPL *session, WT_ITEM *buf, wt_off_t offset, uint32_t size, - uint32_t checksum) WT_GCC_FUNC_ATTRIBUTE((cold)) +__wt_bm_corrupt_dump(WT_SESSION_IMPL *session, WT_ITEM *buf, uint32_t logid, wt_off_t offset, + uint32_t size, uint32_t checksum) WT_GCC_FUNC_ATTRIBUTE((cold)) { WT_DECL_ITEM(tmp); WT_DECL_RET; size_t chunk, i, nchunks; -#define WT_CORRUPT_FMT "{%" PRIuMAX ", %" PRIu32 ", %#" PRIx32 "}" +#define WT_CORRUPT_FMT "{%" PRIu32 ": %" PRIuMAX ", %" PRIu32 ", %#" PRIx32 "}" if (buf->size == 0) { - __wt_errx(session, WT_CORRUPT_FMT ": empty buffer, no dump available", (uintmax_t)offset, - size, checksum); + __wt_errx(session, WT_CORRUPT_FMT ": empty buffer, no dump available", logid, + (uintmax_t)offset, size, checksum); return (0); } @@ -130,7 +134,7 @@ __wt_bm_corrupt_dump(WT_SESSION_IMPL *session, WT_ITEM *buf, wt_off_t offset, ui WT_ERR(__wt_buf_catfmt(session, tmp, "%02x ", ((uint8_t *)buf->data)[i])); if (++i == buf->size || i % 1024 == 0) { __wt_errx(session, - WT_CORRUPT_FMT ": (chunk %" WT_SIZET_FMT " of %" WT_SIZET_FMT "): %.*s", + WT_CORRUPT_FMT ": (chunk %" WT_SIZET_FMT " of %" WT_SIZET_FMT "): %.*s", logid, (uintmax_t)offset, size, checksum, ++chunk, nchunks, (int)tmp->size, (char *)tmp->data); if (i == buf->size) @@ -154,15 +158,15 @@ __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t WT_DECL_ITEM(tmp); WT_DECL_RET; wt_off_t offset; - uint32_t checksum, size; + uint32_t checksum, logid, size; /* Read the block. */ WT_RET(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__wt_bm_read(bm, session, tmp, addr, addr_size)); /* Crack the cookie, dump the block. */ - WT_ERR(__wt_block_buffer_to_addr(bm->block, addr, &offset, &size, &checksum)); - WT_ERR(__wt_bm_corrupt_dump(session, tmp, offset, size, checksum)); + WT_ERR(__wt_block_buffer_to_addr(bm->block, addr, &logid, &offset, &size, &checksum)); + WT_ERR(__wt_bm_corrupt_dump(session, tmp, logid, offset, size, checksum)); err: __wt_scr_free(session, &tmp); @@ -203,14 +207,60 @@ err: #endif /* + * __wt_block_fh -- + * Get a block file handle. + */ +int +__wt_block_fh(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t logid, WT_FH **fhp) +{ + WT_DECL_ITEM(tmp); + WT_DECL_RET; + const char *filename; + + if (!block->log_structured || logid == block->logid) { + *fhp = block->fh; + return (0); + } + + /* TODO: fh readlock */ + if (logid * sizeof(WT_FILE_HANDLE *) < block->lfh_alloc && (*fhp = block->lfh[logid]) != NULL) + return (0); + + /* TODO: fh writelock */ + /* Ensure the array goes far enough. */ + WT_RET(__wt_realloc_def(session, &block->lfh_alloc, logid + 1, &block->lfh)); + if (logid >= block->max_logid) + block->max_logid = logid + 1; + if ((*fhp = block->lfh[logid]) != NULL) + return (0); + + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + if (logid == 0) + filename = block->name; + else { + WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, logid)); + filename = tmp->data; + } + WT_ERR(__wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA, + WT_FS_OPEN_READONLY | block->file_flags, &block->lfh[logid])); + *fhp = block->lfh[logid]; + WT_ASSERT(session, *fhp != NULL); + +err: + __wt_scr_free(session, &tmp); + return (ret); +} + +/* * __wt_block_read_off -- * Read an addr/size pair referenced block into a buffer. */ int -__wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, - uint32_t size, uint32_t checksum) +__wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint32_t logid, + wt_off_t offset, uint32_t size, uint32_t checksum) { WT_BLOCK_HEADER *blk, swap; + WT_FH *fh; size_t bufsize; __wt_verbose(session, WT_VERB_READ, "off %" PRIuMAX ", size %" PRIu32 ", checksum %#" PRIx32, @@ -243,7 +293,8 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_ block->name, size, block->allocsize); WT_RET(__wt_buf_init(session, buf, bufsize)); - WT_RET(__wt_read(session, block->fh, offset, size, buf->mem)); + WT_RET(__wt_block_fh(session, block, logid, &fh)); + WT_RET(__wt_read(session, fh, offset, size, buf->mem)); buf->size = size; /* @@ -276,7 +327,7 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_ block->name, size, (uintmax_t)offset, swap.checksum, checksum); if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) - WT_IGNORE_RET(__wt_bm_corrupt_dump(session, buf, offset, size, checksum)); + WT_IGNORE_RET(__wt_bm_corrupt_dump(session, buf, logid, offset, size, checksum)); /* Panic if a checksum fails during an ordinary read. */ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); |