diff options
Diffstat (limited to 'src/block/block_read.c')
-rw-r--r-- | src/block/block_read.c | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/src/block/block_read.c b/src/block/block_read.c new file mode 100644 index 00000000000..76c56d266c2 --- /dev/null +++ b/src/block/block_read.c @@ -0,0 +1,162 @@ +/*- + * Copyright (c) 2008-2012 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_block_read_buf -- + * Read filesystem cookie referenced block into a buffer. + */ +int +__wt_block_read_buf(WT_SESSION_IMPL *session, WT_BLOCK *block, + WT_ITEM *buf, const uint8_t *addr, uint32_t addr_size) +{ + WT_ITEM *tmp; + off_t offset; + uint32_t size, cksum; + int ret; + + ret = 0; + + /* Crack the cookie. */ + WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); + + /* Read the block. */ + WT_RET(__wt_block_read(session, block, buf, offset, size, cksum)); + + /* Optionally verify the page. */ + if (block->fragbits == NULL) + return (0); + + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_block_addr_string(session, block, tmp, addr, addr_size)); + WT_ERR(__wt_verify_dsk( + session, (char *)tmp->data, buf->mem, buf->size)); + +err: __wt_scr_free(&tmp); + + return (ret); +} + +/* + * __wt_block_read -- + * Read an addr/size pair referenced block into a buffer. + */ +int +__wt_block_read(WT_SESSION_IMPL *session, WT_BLOCK *block, + WT_ITEM *buf, off_t offset, uint32_t size, uint32_t cksum) +{ + WT_BLOCK_HEADER *blk; + WT_ITEM *tmp; + WT_PAGE_HEADER *dsk; + size_t result_len; + uint32_t page_cksum; + int ret; + + tmp = NULL; + ret = 0; + + WT_VERBOSE(session, read, + "off %" PRIuMAX ", size %" PRIu32 ", cksum %" PRIu32, + (uintmax_t)offset, size, cksum); + + /* + * If we're compressing the file blocks, place the initial read into a + * scratch buffer, we're going to have to re-allocate more memory for + * decompression. Else check the caller's buffer size and grow it as + * necessary, there will only be one buffer. + */ + if (block->compressor == NULL) { + WT_RET(__wt_buf_init(session, buf, size)); + buf->size = size; + dsk = buf->mem; + } else { + WT_RET(__wt_scr_alloc(session, size, &tmp)); + tmp->size = size; + dsk = tmp->mem; + } + + /* Read. */ + WT_ERR(__wt_read(session, block->fh, offset, size, dsk)); + blk = WT_BLOCK_HEADER_REF(dsk); + + /* Validate the checksum. */ + if (block->checksum && + cksum != WT_BLOCK_CHECKSUM_NOT_SET && + blk->cksum != WT_BLOCK_CHECKSUM_NOT_SET) { + blk->cksum = 0; + page_cksum = __wt_cksum(dsk, size); + if (page_cksum == WT_BLOCK_CHECKSUM_NOT_SET) + ++page_cksum; + if (cksum != page_cksum) { + if (!F_ISSET(session, WT_SESSION_SALVAGE_QUIET_ERR)) + __wt_errx(session, + "read checksum error [%" + PRIu32 "B @ %" PRIuMAX ", %" + PRIu32 " != %" PRIu32 "]", + size, (uintmax_t)offset, cksum, page_cksum); + WT_ERR(WT_ERROR); + } + } + + /* + * If the in-memory block size is larger than the on-disk block size, + * the block is compressed. Size the user's buffer, copy the skipped + * bytes of the original image into place, then decompress. + * + * If the in-memory block size is less than or equal to the on-disk + * block size, the block is not compressed. + */ + if (blk->disk_size < dsk->size) { + if (block->compressor == NULL) + WT_ERR(__wt_illegal_value(session)); + + WT_RET(__wt_buf_init(session, buf, dsk->size)); + buf->size = dsk->size; + + /* + * Note the source length is NOT the number of compressed bytes, + * it's the length of the block we just read (minus the skipped + * bytes). We don't store the number of compressed bytes: some + * compression engines need that length stored externally, they + * don't have markers in the stream to signal the end of the + * compressed bytes. Those engines must store the compressed + * byte length somehow, see the snappy compression extension for + * an example. + */ + memcpy(buf->mem, tmp->mem, WT_BLOCK_COMPRESS_SKIP); + WT_ERR(block->compressor->decompress( + block->compressor, &session->iface, + (uint8_t *)tmp->mem + WT_BLOCK_COMPRESS_SKIP, + tmp->size - WT_BLOCK_COMPRESS_SKIP, + (uint8_t *)buf->mem + WT_BLOCK_COMPRESS_SKIP, + dsk->size - WT_BLOCK_COMPRESS_SKIP, + &result_len)); + if (result_len != dsk->size - WT_BLOCK_COMPRESS_SKIP) + WT_ERR(__wt_illegal_value(session)); + } else + if (block->compressor == NULL) + buf->size = dsk->size; + else + /* + * We guessed wrong: there was a compressor, but this + * block was not compressed, and now the page is in the + * wrong buffer and the buffer may be of the wrong size. + * This should be rare, why configure a compressor that + * doesn't work? Allocate a buffer of the right size + * (we used a scratch buffer which might be large), and + * copy the data into place. + */ + WT_ERR( + __wt_buf_set(session, buf, tmp->data, dsk->size)); + + WT_BSTAT_INCR(session, page_read); + WT_CSTAT_INCR(session, block_read); + +err: __wt_scr_free(&tmp); + return (ret); +} |