summaryrefslogtreecommitdiff
path: root/src/block/block_slvg.c
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2012-03-25 17:49:32 +0000
committerKeith Bostic <keith@wiredtiger.com>2012-03-25 17:52:09 +0000
commitb97813245af2ef6da2b761ee619a692f4d46f42b (patch)
tree692b12cd00b817bd81e7ed56103912a847eaddcb /src/block/block_slvg.c
parentc3cecf7f3aa67978ba0ded16a4f97896093f7e0e (diff)
downloadmongo-b97813245af2ef6da2b761ee619a692f4d46f42b.tar.gz
Second set of snapshot changes, ref #167:
Start using the alloc and discard lists instead of running everything through the single avail list. This primarily affects what happens when a snapshot is deleted. A snapshot's avail list is discarded; alloc and discard lists are rolled-forward into the live system's corresponding lists, and then the alloc and discard lists are checked for overlaps, the overlaps are the blocks that are newly available for re-use. Change extent writes to always append to the file (the problem is that we can't allocate the avail list blocks from the avail list, and it's simpler to always extend the file for extent lists rather than try to figure out which extent lists writes are extending the file and which aren't.) This implies that we can tell the allocation code to always extend the file, and the free code to simply put the free'd blocks on the avail list, because extent blocks don't appear in the allocation list. Change page reconciliation to release all pages in the page-tracking information before writing a snapshot, the underlying system has to be fully consistent. We can no longer retain currently unused overflow pages to re-use, across reconciliation calls: if a reconciliation call results in a snapshot, any overflow page not used as part of the page reconciliation must be discarded, otherwise the underlying snapshot won't be completely consistent. This adds some (very!) few cases where we don't re-use overflow items we would have re-used in earlier versions of the system, but it shouldn't be common. This required an extensive re-work of the block/overflow page tracking code. Changes for salvage: load and unload snapshots during salvage, blocks free'd during salvage do not appear on the allocation list, so must be free'd explicitly to the system's avail list. Rename block-manager page type WT_PAGE_FREELIST to WT_PAGE_BLOCK_MANAGER. We still handle block-manager pages in the btree layer during salvage, but we could also handle them in the salvage code if we're willing to look at the page type. Set the snapshot field names when cracking the buffer cookie. Don't display the by-size extent lists by default, it's not very useful.
Diffstat (limited to 'src/block/block_slvg.c')
-rw-r--r--src/block/block_slvg.c20
1 files changed, 13 insertions, 7 deletions
diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c
index 8b23547e7b6..baca1bb06a2 100644
--- a/src/block/block_slvg.c
+++ b/src/block/block_slvg.c
@@ -17,6 +17,9 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
off_t len;
uint32_t allocsize;
+ /* Reset the live snapshot information. */
+ WT_RET(__wt_block_snap_init(session, block, &block->live, 1));
+
/*
* Truncate the file to an initial sector plus N allocation size
* units (bytes trailing the last multiple of an allocation size
@@ -38,10 +41,11 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
block->slvg_off = WT_BLOCK_DESC_SECTOR;
/*
- * We don't currently need to do anything about the freelist because
- * we don't read it for salvage operations.
+ * We don't currently need to do anything about the snapshot extents
+ * because we don't read them for salvage operations.
*/
+ block->live_load = block->slvg = 1;
return (0);
}
@@ -50,11 +54,13 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
* End a file salvage.
*/
int
-__wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block, int success)
+__wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- /* If not successful, discard the live snapshot we've created. */
- if (!success)
- (void)__wt_block_snap_unload(session, block);
+ /* Discard the live snapshot. */
+ WT_RET(__wt_block_snap_unload(session, block));
+
+ block->slvg = 0;
+
return (0);
}
@@ -122,7 +128,7 @@ skip: WT_VERBOSE(session, salvage,
* than once.
*/
WT_RET(__wt_block_free(
- session, block, offset, (off_t)allocsize));
+ session, block, offset, (off_t)allocsize, 0));
block->slvg_off = offset += allocsize;
continue;
}