summaryrefslogtreecommitdiff
path: root/src/block
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2012-11-23 13:41:20 +0000
committerKeith Bostic <keith@wiredtiger.com>2012-11-23 13:41:20 +0000
commit7acb1bf32940fee804346b61200e459c38b784bb (patch)
tree501d48dda605b4efa4f3dd47fd03d0305ecdd2a2 /src/block
parentb25797a83046f7684178b8ea1b0080f989f265a3 (diff)
downloadmongo-7acb1bf32940fee804346b61200e459c38b784bb.tar.gz
We need to checksum the data if it's not compressed, even if checksums
have been turned off. Drill holes down from the btree write function to the underlying block write function so it knows if the data requires a checksum. Add information to the block header structure so we know if the checksum applies to the leading 64B or the entire data segment.
Diffstat (limited to 'src/block')
-rw-r--r--src/block/block_ckpt.c7
-rw-r--r--src/block/block_ext.c2
-rw-r--r--src/block/block_mgr.c13
-rw-r--r--src/block/block_open.c4
-rw-r--r--src/block/block_read.c4
-rw-r--r--src/block/block_slvg.c9
-rw-r--r--src/block/block_write.c34
7 files changed, 39 insertions, 34 deletions
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
index dec1c4bde0d..5b2c273d21e 100644
--- a/src/block/block_ckpt.c
+++ b/src/block/block_ckpt.c
@@ -186,8 +186,8 @@ __wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci)
* Create a new checkpoint.
*/
int
-__wt_block_checkpoint(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase)
+__wt_block_checkpoint(WT_SESSION_IMPL *session,
+ WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase, int data_cksum)
{
WT_BLOCK_CKPT *ci;
@@ -208,7 +208,8 @@ __wt_block_checkpoint(
ci->root_size = ci->root_cksum = 0;
} else
WT_RET(__wt_block_write_off(session, block, buf,
- &ci->root_offset, &ci->root_size, &ci->root_cksum, 0));
+ &ci->root_offset, &ci->root_size, &ci->root_cksum,
+ data_cksum, 0));
/* Process the checkpoint list, deleting and updating as required. */
WT_RET(__ckpt_process(session, block, ckptbase));
diff --git a/src/block/block_ext.c b/src/block/block_ext.c
index f799c6f6c2b..16bab8e3fb8 100644
--- a/src/block/block_ext.c
+++ b/src/block/block_ext.c
@@ -1101,7 +1101,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session,
/* Write the extent list to disk. */
WT_ERR(__wt_block_write_off(
- session, block, tmp, &el->offset, &el->size, &el->cksum, 1));
+ session, block, tmp, &el->offset, &el->size, &el->cksum, 1, 1));
WT_VERBOSE_ERR(session, block,
"%s written %" PRIdMAX "/%" PRIu32,
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 92f6535c4e1..599abb9c38d 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -136,14 +136,16 @@ __wt_bm_close(WT_SESSION_IMPL *session)
* Write a buffer into a block, creating a checkpoint.
*/
int
-__wt_bm_checkpoint(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckptbase)
+__wt_bm_checkpoint(
+ WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckptbase, int data_cksum)
{
WT_BLOCK *block;
if ((block = session->btree->block) == NULL)
return (__bm_invalid(session));
- return (__wt_block_checkpoint(session, block, buf, ckptbase));
+ return (
+ __wt_block_checkpoint(session, block, buf, ckptbase, data_cksum));
}
/*
@@ -287,15 +289,16 @@ __wt_bm_write_size(WT_SESSION_IMPL *session, uint32_t *sizep)
* Write a buffer into a block, returning the block's address cookie.
*/
int
-__wt_bm_write(
- WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, uint32_t *addr_size)
+__wt_bm_write(WT_SESSION_IMPL *session,
+ WT_ITEM *buf, uint8_t *addr, uint32_t *addr_size, int data_cksum)
{
WT_BLOCK *block;
if ((block = session->btree->block) == NULL)
return (__bm_invalid(session));
- return (__wt_block_write(session, block, buf, addr, addr_size));
+ return (
+ __wt_block_write(session, block, buf, addr, addr_size, data_cksum));
}
/*
diff --git a/src/block/block_open.c b/src/block/block_open.c
index d0704d48624..361e1d942e9 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -86,10 +86,6 @@ __wt_block_open(WT_SESSION_IMPL *session, const char *filename,
WT_ERR(__wt_config_getones(session, config, "allocation_size", &cval));
block->allocsize = (uint32_t)cval.val;
- /* Check if configured for checksums. */
- WT_ERR(__wt_config_getones(session, config, "checksum", &cval));
- block->checksum = cval.val == 0 ? 0 : 1;
-
/* Open the underlying file handle. */
WT_ERR(__wt_open(session, filename, 0, 0, 1, &block->fh));
diff --git a/src/block/block_read.c b/src/block/block_read.c
index 9c5288c7ba8..6bd34fcee9e 100644
--- a/src/block/block_read.c
+++ b/src/block/block_read.c
@@ -79,8 +79,8 @@ __wt_block_read_off(WT_SESSION_IMPL *session,
/* Validate the checksum. */
blk = WT_BLOCK_HEADER_REF(buf->mem);
blk->cksum = 0;
- page_cksum = __wt_cksum(
- buf->mem, block->checksum ? size : WT_BLOCK_COMPRESS_SKIP);
+ page_cksum = __wt_cksum(buf->mem,
+ F_ISSET(blk, WT_BLOCK_DATA_CKSUM) ? size : WT_BLOCK_COMPRESS_SKIP);
if (cksum != page_cksum) {
if (!F_ISSET(session, WT_SESSION_SALVAGE_QUIET_ERR))
__wt_errx(session,
diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c
index 86c78bf2c2a..56a37806547 100644
--- a/src/block/block_slvg.c
+++ b/src/block/block_slvg.c
@@ -122,11 +122,10 @@ __wt_block_salvage_next(
goto skip;
/*
- * The page size isn't insane, read the entire page: reading the
- * page validates the checksum, if checksums are configured. If
- * reading the page fails, it's probably corruption, ignore the
- * block. If reading the page succeeds, return its address as a
- * possible block.
+ * The block size isn't insane, read the entire block. Reading
+ * the block validates the checksum; if reading the block fails,
+ * ignore it. If reading the block succeeds, return its address
+ * as a possible page.
*/
if (__wt_block_read_off(
session, block, tmp, offset, size, cksum) == 0)
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 2430cd37a32..b42a92a42bc 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -38,8 +38,8 @@ __wt_block_write_size(
* Write a buffer into a block, returning the block's address cookie.
*/
int
-__wt_block_write(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, uint32_t *addr_size)
+__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_ITEM *buf, uint8_t *addr, uint32_t *addr_size, int data_cksum)
{
off_t offset;
uint32_t size, cksum;
@@ -48,7 +48,7 @@ __wt_block_write(WT_SESSION_IMPL *session,
WT_UNUSED(addr_size);
WT_RET(__wt_block_write_off(
- session, block, buf, &offset, &size, &cksum, 0));
+ session, block, buf, &offset, &size, &cksum, data_cksum, 0));
endp = addr;
WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
@@ -64,7 +64,8 @@ __wt_block_write(WT_SESSION_IMPL *session,
*/
int
__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_ITEM *buf, off_t *offsetp, uint32_t *sizep, uint32_t *cksump, int locked)
+ WT_ITEM *buf, off_t *offsetp, uint32_t *sizep, uint32_t *cksump,
+ int data_cksum, int locked)
{
WT_BLOCK_HEADER *blk;
WT_DECL_RET;
@@ -112,19 +113,24 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
blk->disk_size = align_size;
/*
- * Update the block's checksum: if checksums are configured, checksum
- * the complete data, otherwise checksum just the not-compressed bytes.
- *
- * The assumption is applications with strong compression support turn
- * off checksums and assume corrupted blocks won't decompress correctly.
- * However, the first WT_BLOCK_COMPRESS_SKIP bytes are not compressed,
- * so we checksum them, both to give salvage a quick test of whether a
- * block is useful, and to hopefully give us a test so we don't lose the
- * first WT_BLOCK_COMPRESS_SKIP bytes without noticing.
+ * Update the block's checksum: if our caller specifies, checksum the
+ * complete data, otherwise checksum the leading WT_BLOCK_COMPRESS_SKIP
+ * bytes. The assumption is applications with good compression support
+ * turn off checksums and assume corrupted blocks won't decompress
+ * correctly. However, if compression failed to shrink the block, the
+ * block wasn't compressed, in which case our caller will tell us to
+ * checksum the data to detect corruption. If compression succeeded,
+ * we still need to checksum the first WT_BLOCK_COMPRESS_SKIP bytes
+ * because they're not compressed, both to give salvage a quick test
+ * of whether a block is useful and to give us a test so we don't lose
+ * the first WT_BLOCK_COMPRESS_SKIP bytes without noticing.
*/
+ blk->flags = 0;
+ if (data_cksum)
+ F_SET(blk, WT_BLOCK_DATA_CKSUM);
blk->cksum = 0;
blk->cksum = __wt_cksum(
- buf->mem, block->checksum ? align_size : WT_BLOCK_COMPRESS_SKIP);
+ buf->mem, data_cksum ? align_size : WT_BLOCK_COMPRESS_SKIP);
if (!locked)
__wt_spin_lock(session, &block->live_lock);