diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/block')
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_map.c | 14 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_mgr.c | 129 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_open.c | 46 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_read.c | 23 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_slvg.c | 1 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_write.c | 23 |
6 files changed, 145 insertions, 91 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_map.c b/src/third_party/wiredtiger/src/block/block_map.c index 3d04a492269..b60623a37d8 100644 --- a/src/third_party/wiredtiger/src/block/block_map.c +++ b/src/third_party/wiredtiger/src/block/block_map.c @@ -20,6 +20,19 @@ __wt_block_map( *(void **)mapp = NULL; *maplenp = 0; +#ifdef WORDS_BIGENDIAN + /* + * The underlying objects are little-endian, mapping objects isn't + * currently supported on big-endian systems. + */ + WT_UNUSED(session); + WT_UNUSED(block); + WT_UNUSED(mappingcookie); +#else + /* Map support is configurable. */ + if (!S2C(session)->mmap) + return (0); + /* * Turn off mapping when verifying the file, because we can't perform * checksum validation of mapped segments, and verify has to checksum @@ -48,6 +61,7 @@ __wt_block_map( * Ignore errors, we'll read the file through the cache if map fails. */ (void)__wt_mmap(session, block->fh, mapp, maplenp, mappingcookie); +#endif return (0); } diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 6e2dc775362..dceaae8bb99 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -69,18 +69,6 @@ __bm_checkpoint(WT_BM *bm, } /* - * __bm_sync -- - * Flush a file to disk. - */ -static int -__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool async) -{ - return (async ? - __wt_fsync_async(session, bm->block->fh) : - __wt_fsync(session, bm->block->fh)); -} - -/* * __bm_checkpoint_load -- * Load a checkpoint. */ @@ -89,10 +77,6 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint) { - WT_CONNECTION_IMPL *conn; - - conn = S2C(session); - /* If not opening a checkpoint, we're opening the live system. */ bm->is_live = !checkpoint; WT_RET(__wt_block_checkpoint_load(session, bm->block, @@ -103,9 +87,8 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session, * Read-only objects are optionally mapped into memory instead * of being read into cache buffers. */ - if (conn->mmap) - WT_RET(__wt_block_map(session, bm->block, - &bm->map, &bm->maplen, &bm->mappingcookie)); + WT_RET(__wt_block_map(session, + bm->block, &bm->map, &bm->maplen, &bm->mappingcookie)); /* * If this handle is for a checkpoint, that is, read-only, there @@ -168,13 +151,13 @@ __bm_close(WT_BM *bm, WT_SESSION_IMPL *session) } /* - * __bm_compact_start -- - * Start a block manager compaction. + * __bm_compact_end -- + * End a block manager compaction. */ static int -__bm_compact_start(WT_BM *bm, WT_SESSION_IMPL *session) +__bm_compact_end(WT_BM *bm, WT_SESSION_IMPL *session) { - return (__wt_block_compact_start(session, bm->block)); + return (__wt_block_compact_end(session, bm->block)); } /* @@ -200,13 +183,13 @@ __bm_compact_skip(WT_BM *bm, WT_SESSION_IMPL *session, bool *skipp) } /* - * __bm_compact_end -- - * End a block manager compaction. + * __bm_compact_start -- + * Start a block manager compaction. */ static int -__bm_compact_end(WT_BM *bm, WT_SESSION_IMPL *session) +__bm_compact_start(WT_BM *bm, WT_SESSION_IMPL *session) { - return (__wt_block_compact_end(session, bm->block)); + return (__wt_block_compact_start(session, bm->block)); } /* @@ -233,36 +216,25 @@ __bm_is_mapped(WT_BM *bm, WT_SESSION_IMPL *session) } /* - * __bm_stat -- - * Block-manager statistics. - */ -static int -__bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats) -{ - __wt_block_stat(session, bm->block, stats); - return (0); -} - -/* - * __bm_write -- - * Write a buffer into a block, returning the block's address cookie. + * __bm_salvage_end -- + * End a block manager salvage. */ static int -__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, - WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum) +__bm_salvage_end(WT_BM *bm, WT_SESSION_IMPL *session) { - return (__wt_block_write( - session, bm->block, buf, addr, addr_sizep, data_cksum)); + return (__wt_block_salvage_end(session, bm->block)); } /* - * __bm_write_size -- - * Return the buffer size required to write a block. + * __bm_salvage_next -- + * Return the next block from the file. */ static int -__bm_write_size(WT_BM *bm, WT_SESSION_IMPL *session, size_t *sizep) +__bm_salvage_next(WT_BM *bm, + WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp) { - return (__wt_block_write_size(session, bm->block, sizep)); + return (__wt_block_salvage_next( + session, bm->block, addr, addr_sizep, eofp)); } /* @@ -288,25 +260,47 @@ __bm_salvage_valid(WT_BM *bm, } /* - * __bm_salvage_next -- - * Return the next block from the file. + * __bm_stat -- + * Block-manager statistics. */ static int -__bm_salvage_next(WT_BM *bm, - WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp) +__bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats) { - return (__wt_block_salvage_next( - session, bm->block, addr, addr_sizep, eofp)); + __wt_block_stat(session, bm->block, stats); + return (0); } /* - * __bm_salvage_end -- - * End a block manager salvage. + * __bm_sync -- + * Flush a file to disk. */ static int -__bm_salvage_end(WT_BM *bm, WT_SESSION_IMPL *session) +__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool async) { - return (__wt_block_salvage_end(session, bm->block)); + return (async ? + __wt_fsync_async(session, bm->block->fh) : + __wt_fsync(session, bm->block->fh)); +} + +/* + * __bm_verify_addr -- + * Verify an address. + */ +static int +__bm_verify_addr(WT_BM *bm, + WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) +{ + return (__wt_block_verify_addr(session, bm->block, addr, addr_size)); +} + +/* + * __bm_verify_end -- + * End a block manager verify. + */ +static int +__bm_verify_end(WT_BM *bm, WT_SESSION_IMPL *session) +{ + return (__wt_block_verify_end(session, bm->block)); } /* @@ -321,24 +315,25 @@ __bm_verify_start(WT_BM *bm, } /* - * __bm_verify_addr -- - * Verify an address. + * __bm_write -- + * Write a buffer into a block, returning the block's address cookie. */ static int -__bm_verify_addr(WT_BM *bm, - WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) +__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, + WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum) { - return (__wt_block_verify_addr(session, bm->block, addr, addr_size)); + return (__wt_block_write( + session, bm->block, buf, addr, addr_sizep, data_cksum)); } /* - * __bm_verify_end -- - * End a block manager verify. + * __bm_write_size -- + * Return the buffer size required to write a block. */ static int -__bm_verify_end(WT_BM *bm, WT_SESSION_IMPL *session) +__bm_write_size(WT_BM *bm, WT_SESSION_IMPL *session, size_t *sizep) { - return (__wt_block_verify_end(session, bm->block)); + return (__wt_block_write_size(session, bm->block, sizep)); } /* diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index dd0f3f0716a..d9b2f908737 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -296,15 +296,21 @@ __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) WT_RET(__wt_scr_alloc(session, allocsize, &buf)); memset(buf->mem, 0, allocsize); + /* + * Checksum a little-endian version of the header, and write everything + * in little-endian format. The checksum is (potentially) returned in a + * big-endian format, swap it into place in a separate step. + */ desc = buf->mem; desc->magic = WT_BLOCK_MAGIC; desc->majorv = WT_BLOCK_MAJOR_VERSION; desc->minorv = WT_BLOCK_MINOR_VERSION; - - /* Update the checksum. */ desc->cksum = 0; + __wt_block_desc_byteswap(desc); desc->cksum = __wt_cksum(desc, allocsize); - +#ifdef WORDS_BIGENDIAN + desc->cksum = __wt_bswap32(desc->cksum); +#endif ret = __wt_write(session, fh, (wt_off_t)0, (size_t)allocsize, desc); __wt_scr_free(session, &buf); @@ -321,7 +327,7 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_BLOCK_DESC *desc; WT_DECL_ITEM(buf); WT_DECL_RET; - uint32_t cksum; + uint32_t cksum_calculate, cksum_tmp; /* Use a scratch buffer to get correct alignment for direct I/O. */ WT_RET(__wt_scr_alloc(session, block->allocsize, &buf)); @@ -330,14 +336,19 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_ERR(__wt_read(session, block->fh, (wt_off_t)0, (size_t)block->allocsize, buf->mem)); + /* + * Handle little- and big-endian objects. Objects are written in little- + * endian format: save the header checksum, and calculate the checksum + * for the header in its little-endian form. Then, restore the header's + * checksum, and byte-swap the whole thing as necessary, leaving us with + * a calculated checksum that should match the checksum in the header. + */ desc = buf->mem; - WT_ERR(__wt_verbose(session, WT_VERB_BLOCK, - "%s: magic %" PRIu32 - ", major/minor: %" PRIu32 "/%" PRIu32 - ", checksum %#" PRIx32, - block->name, desc->magic, - desc->majorv, desc->minorv, - desc->cksum)); + cksum_tmp = desc->cksum; + desc->cksum = 0; + cksum_calculate = __wt_cksum(desc, block->allocsize); + desc->cksum = cksum_tmp; + __wt_block_desc_byteswap(desc); /* * We fail the open if the checksum fails, or the magic number is wrong @@ -348,10 +359,7 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block) * may have entered the wrong file name, and is now frantically pounding * their interrupt key. */ - cksum = desc->cksum; - desc->cksum = 0; - if (desc->magic != WT_BLOCK_MAGIC || - cksum != __wt_cksum(desc, block->allocsize)) + if (desc->magic != WT_BLOCK_MAGIC || desc->cksum != cksum_calculate) WT_ERR_MSG(session, WT_ERROR, "%s does not appear to be a WiredTiger file", block->name); @@ -365,6 +373,14 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_BLOCK_MAJOR_VERSION, WT_BLOCK_MINOR_VERSION, desc->majorv, desc->minorv); + WT_ERR(__wt_verbose(session, WT_VERB_BLOCK, + "%s: magic %" PRIu32 + ", major/minor: %" PRIu32 "/%" PRIu32 + ", checksum %#" PRIx32, + block->name, desc->magic, + desc->majorv, desc->minorv, + desc->cksum)); + err: __wt_scr_free(session, &buf); return (ret); } diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 0e5911ecf2a..6e74d7a7793 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -139,6 +139,7 @@ __wt_block_read_off_blind( WT_RET(__wt_read( session, block->fh, offset, (size_t)block->allocsize, buf->mem)); blk = WT_BLOCK_HEADER_REF(buf->mem); + __wt_block_header_byteswap(blk); /* * Copy out the size and checksum (we're about to re-use the buffer), @@ -163,7 +164,7 @@ int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t cksum) { - WT_BLOCK_HEADER *blk; + WT_BLOCK_HEADER *blk, swap; size_t bufsize; uint32_t page_cksum; @@ -193,14 +194,26 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_RET(__wt_read(session, block->fh, offset, size, buf->mem)); buf->size = size; + /* + * We incrementally read through the structure before doing a checksum, + * do little- to big-endian handling early on, and then select from the + * original or swapped structure as needed. + */ blk = WT_BLOCK_HEADER_REF(buf->mem); - if (blk->cksum == cksum) { + __wt_block_header_byteswap_copy(blk, &swap); + if (swap.cksum == cksum) { blk->cksum = 0; page_cksum = __wt_cksum(buf->mem, - F_ISSET(blk, WT_BLOCK_DATA_CKSUM) ? + F_ISSET(&swap, WT_BLOCK_DATA_CKSUM) ? size : WT_BLOCK_COMPRESS_SKIP); - if (page_cksum == cksum) + if (page_cksum == cksum) { + /* + * Swap the page-header as needed; this doesn't belong + * here, but it's the best place to catch all callers. + */ + __wt_page_header_byteswap(buf->mem); return (0); + } if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) __wt_errx(session, @@ -216,7 +229,7 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, "offset %" PRIuMAX ": block header checksum " "of %" PRIu32 " doesn't match expected checksum " "of %" PRIu32, - size, (uintmax_t)offset, blk->cksum, cksum); + size, (uintmax_t)offset, swap.cksum, cksum); /* Panic if a checksum fails during an ordinary read. */ return (block->verify || diff --git a/src/third_party/wiredtiger/src/block/block_slvg.c b/src/third_party/wiredtiger/src/block/block_slvg.c index ef22c727db4..a8cccd53023 100644 --- a/src/third_party/wiredtiger/src/block/block_slvg.c +++ b/src/third_party/wiredtiger/src/block/block_slvg.c @@ -126,6 +126,7 @@ __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_ERR(__wt_read( session, fh, offset, (size_t)allocsize, tmp->mem)); blk = WT_BLOCK_HEADER_REF(tmp->mem); + __wt_block_header_byteswap(blk); size = blk->disk_size; cksum = blk->cksum; diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c index 23f4d7650b9..4c6ac198fe4 100644 --- a/src/third_party/wiredtiger/src/block/block_write.c +++ b/src/third_party/wiredtiger/src/block/block_write.c @@ -203,11 +203,18 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_FH *fh; size_t align_size; wt_off_t offset; + uint32_t cksum; bool local_locked; blk = WT_BLOCK_HEADER_REF(buf->mem); fh = block->fh; + /* + * Swap the page-header as needed; this doesn't belong here, but it's + * the best place to catch all callers. + */ + __wt_page_header_byteswap(buf->mem); + /* Buffers should be aligned for writing. */ if (!F_ISSET(buf, WT_ITEM_ALIGNED)) { WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED)); @@ -255,13 +262,21 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, * because they're not compressed, both to give salvage a quick test * of whether a block is useful and to give us a test so we don't lose * the first WT_BLOCK_COMPRESS_SKIP bytes without noticing. + * + * Checksum a little-endian version of the header, and write everything + * in little-endian format. The checksum is (potentially) returned in a + * big-endian format, swap it into place in a separate step. */ blk->flags = 0; if (data_cksum) F_SET(blk, WT_BLOCK_DATA_CKSUM); blk->cksum = 0; - blk->cksum = __wt_cksum( + __wt_block_header_byteswap(blk); + blk->cksum = cksum = __wt_cksum( buf->mem, data_cksum ? align_size : WT_BLOCK_COMPRESS_SKIP); +#ifdef WORDS_BIGENDIAN + blk->cksum = __wt_bswap32(blk->cksum); +#endif /* Pre-allocate some number of extension structures. */ WT_RET(__wt_block_ext_prealloc(session, 5)); @@ -325,11 +340,11 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_RET(__wt_verbose(session, WT_VERB_WRITE, "off %" PRIuMAX ", size %" PRIuMAX ", cksum %" PRIu32, - (uintmax_t)offset, (uintmax_t)align_size, blk->cksum)); + (uintmax_t)offset, (uintmax_t)align_size, cksum)); *offsetp = offset; *sizep = WT_STORE_SIZE(align_size); - *cksump = blk->cksum; + *cksump = cksum; - return (ret); + return (0); } |