summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/block/block_read.c
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2021-01-27 13:28:13 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-01-27 03:07:52 +0000
commit77d134c7a2bab846aaaa056b0883888a8219da2a (patch)
tree8caac27a3573139d221fdf93ba9c99b3e042bc78 /src/third_party/wiredtiger/src/block/block_read.c
parent5fad1f69662696c5b789392622aa34d370fb4825 (diff)
downloadmongo-77d134c7a2bab846aaaa056b0883888a8219da2a.tar.gz
Import wiredtiger: a52cd5a47a7e9af9e2c341e66f0ffdd9bc977930 from branch mongodb-4.4
ref: ef1f2937c3..a52cd5a47a for: 4.4.4 WT-6309 Add support for start/stop arguments to wt printlog command WT-6866 Refactor python backup tests initial base class WT-6924 Queue history store pages for urgent eviction when cache pressure is high WT-6946 Adding test tags to an initial set of test programs WT-7068 Add column store support to test_hs03 WT-7084 Fix assert in test code and a comment error WT-7109 Retain no longer supported configuration options for backward compatibility WT-7113 Integrate prototype tiered storage code into WT WT-7114 Revert Makefile code to always run the prototype script
Diffstat (limited to 'src/third_party/wiredtiger/src/block/block_read.c')
-rw-r--r--src/third_party/wiredtiger/src/block/block_read.c91
1 files changed, 71 insertions, 20 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c
index 8680f1f90f8..c5e3a1c193c 100644
--- a/src/third_party/wiredtiger/src/block/block_read.c
+++ b/src/third_party/wiredtiger/src/block/block_read.c
@@ -18,9 +18,10 @@ __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t
WT_BLOCK *block;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
+ WT_FH *fh;
WT_FILE_HANDLE *handle;
wt_off_t offset;
- uint32_t checksum, size;
+ uint32_t checksum, logid, size;
bool mapped;
block = bm->block;
@@ -28,9 +29,10 @@ __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t
WT_STAT_CONN_INCR(session, block_preload);
/* Crack the cookie. */
- WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &logid, &offset, &size, &checksum));
- handle = block->fh->handle;
+ WT_RET(__wt_block_fh(session, block, logid, &fh));
+ handle = fh->handle;
mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
if (mapped && handle->fh_map_preload != NULL)
ret = handle->fh_map_preload(
@@ -59,21 +61,23 @@ __wt_bm_read(
{
WT_BLOCK *block;
WT_DECL_RET;
+ WT_FH *fh;
WT_FILE_HANDLE *handle;
wt_off_t offset;
- uint32_t checksum, size;
+ uint32_t checksum, logid, size;
bool mapped;
WT_UNUSED(addr_size);
block = bm->block;
/* Crack the cookie. */
- WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &logid, &offset, &size, &checksum));
/*
* Map the block if it's possible.
*/
- handle = block->fh->handle;
+ WT_RET(__wt_block_fh(session, block, logid, &fh));
+ handle = fh->handle;
mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
if (mapped && handle->fh_map_preload != NULL) {
buf->data = (uint8_t *)bm->map + offset;
@@ -96,7 +100,7 @@ __wt_bm_read(
#endif
/* Read the block. */
__wt_capacity_throttle(session, size, WT_THROTTLE_READ);
- WT_RET(__wt_block_read_off(session, block, buf, offset, size, checksum));
+ WT_RET(__wt_block_read_off(session, block, buf, logid, offset, size, checksum));
/* Optionally discard blocks from the system's buffer cache. */
WT_RET(__wt_block_discard(session, block, (size_t)size));
@@ -109,17 +113,17 @@ __wt_bm_read(
* Dump a block into the log in 1KB chunks.
*/
static int
-__wt_bm_corrupt_dump(WT_SESSION_IMPL *session, WT_ITEM *buf, wt_off_t offset, uint32_t size,
- uint32_t checksum) WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_bm_corrupt_dump(WT_SESSION_IMPL *session, WT_ITEM *buf, uint32_t logid, wt_off_t offset,
+ uint32_t size, uint32_t checksum) WT_GCC_FUNC_ATTRIBUTE((cold))
{
WT_DECL_ITEM(tmp);
WT_DECL_RET;
size_t chunk, i, nchunks;
-#define WT_CORRUPT_FMT "{%" PRIuMAX ", %" PRIu32 ", %#" PRIx32 "}"
+#define WT_CORRUPT_FMT "{%" PRIu32 ": %" PRIuMAX ", %" PRIu32 ", %#" PRIx32 "}"
if (buf->size == 0) {
- __wt_errx(session, WT_CORRUPT_FMT ": empty buffer, no dump available", (uintmax_t)offset,
- size, checksum);
+ __wt_errx(session, WT_CORRUPT_FMT ": empty buffer, no dump available", logid,
+ (uintmax_t)offset, size, checksum);
return (0);
}
@@ -130,7 +134,7 @@ __wt_bm_corrupt_dump(WT_SESSION_IMPL *session, WT_ITEM *buf, wt_off_t offset, ui
WT_ERR(__wt_buf_catfmt(session, tmp, "%02x ", ((uint8_t *)buf->data)[i]));
if (++i == buf->size || i % 1024 == 0) {
__wt_errx(session,
- WT_CORRUPT_FMT ": (chunk %" WT_SIZET_FMT " of %" WT_SIZET_FMT "): %.*s",
+ WT_CORRUPT_FMT ": (chunk %" WT_SIZET_FMT " of %" WT_SIZET_FMT "): %.*s", logid,
(uintmax_t)offset, size, checksum, ++chunk, nchunks, (int)tmp->size,
(char *)tmp->data);
if (i == buf->size)
@@ -154,15 +158,15 @@ __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t
WT_DECL_ITEM(tmp);
WT_DECL_RET;
wt_off_t offset;
- uint32_t checksum, size;
+ uint32_t checksum, logid, size;
/* Read the block. */
WT_RET(__wt_scr_alloc(session, 0, &tmp));
WT_ERR(__wt_bm_read(bm, session, tmp, addr, addr_size));
/* Crack the cookie, dump the block. */
- WT_ERR(__wt_block_buffer_to_addr(bm->block, addr, &offset, &size, &checksum));
- WT_ERR(__wt_bm_corrupt_dump(session, tmp, offset, size, checksum));
+ WT_ERR(__wt_block_buffer_to_addr(bm->block, addr, &logid, &offset, &size, &checksum));
+ WT_ERR(__wt_bm_corrupt_dump(session, tmp, logid, offset, size, checksum));
err:
__wt_scr_free(session, &tmp);
@@ -203,14 +207,60 @@ err:
#endif
/*
+ * __wt_block_fh --
+ * Get a block file handle.
+ */
+int
+__wt_block_fh(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t logid, WT_FH **fhp)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ const char *filename;
+
+ if (!block->log_structured || logid == block->logid) {
+ *fhp = block->fh;
+ return (0);
+ }
+
+ /* TODO: fh readlock */
+ if (logid * sizeof(WT_FILE_HANDLE *) < block->lfh_alloc && (*fhp = block->lfh[logid]) != NULL)
+ return (0);
+
+ /* TODO: fh writelock */
+ /* Ensure the array goes far enough. */
+ WT_RET(__wt_realloc_def(session, &block->lfh_alloc, logid + 1, &block->lfh));
+ if (logid >= block->max_logid)
+ block->max_logid = logid + 1;
+ if ((*fhp = block->lfh[logid]) != NULL)
+ return (0);
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ if (logid == 0)
+ filename = block->name;
+ else {
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, logid));
+ filename = tmp->data;
+ }
+ WT_ERR(__wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA,
+ WT_FS_OPEN_READONLY | block->file_flags, &block->lfh[logid]));
+ *fhp = block->lfh[logid];
+ WT_ASSERT(session, *fhp != NULL);
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
* __wt_block_read_off --
* Read an addr/size pair referenced block into a buffer.
*/
int
-__wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset,
- uint32_t size, uint32_t checksum)
+__wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint32_t logid,
+ wt_off_t offset, uint32_t size, uint32_t checksum)
{
WT_BLOCK_HEADER *blk, swap;
+ WT_FH *fh;
size_t bufsize;
__wt_verbose(session, WT_VERB_READ, "off %" PRIuMAX ", size %" PRIu32 ", checksum %#" PRIx32,
@@ -243,7 +293,8 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_
block->name, size, block->allocsize);
WT_RET(__wt_buf_init(session, buf, bufsize));
- WT_RET(__wt_read(session, block->fh, offset, size, buf->mem));
+ WT_RET(__wt_block_fh(session, block, logid, &fh));
+ WT_RET(__wt_read(session, fh, offset, size, buf->mem));
buf->size = size;
/*
@@ -276,7 +327,7 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_
block->name, size, (uintmax_t)offset, swap.checksum, checksum);
if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
- WT_IGNORE_RET(__wt_bm_corrupt_dump(session, buf, offset, size, checksum));
+ WT_IGNORE_RET(__wt_bm_corrupt_dump(session, buf, logid, offset, size, checksum));
/* Panic if a checksum fails during an ordinary read. */
F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);