summaryrefslogtreecommitdiff
path: root/src/block/block_slvg.c
blob: 50a0d65fc83579ef1e6e102eea8456bea1cd31c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 2008-2011 WiredTiger, Inc.
 *	All rights reserved.
 */

#include "wt_internal.h"

/*
 * __wt_block_salvage_start --
 *	Start a file salvage.
 */
int
__wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
	off_t len;
	uint32_t allocsize;

	/*
	 * Truncate the file to an initial sector plus N allocation size
	 * units (bytes trailing the last multiple of an allocation size
	 * unit must be garbage, by definition).
	 */
	if (block->fh->file_size > WT_BLOCK_DESC_SECTOR) {
		allocsize = block->allocsize;
		len = block->fh->file_size - WT_BLOCK_DESC_SECTOR;
		len = (len / allocsize) * allocsize;
		len += WT_BLOCK_DESC_SECTOR;
		if (len != block->fh->file_size)
			WT_RET(__wt_ftruncate(session, block->fh, len));
	}

	/* Reset the description sector. */
	WT_RET(__wt_desc_init(session, block->fh));

	/* The first sector of the file is the description record, skip it. */
	block->slvg_off = WT_BLOCK_DESC_SECTOR;

	/*
	 * We don't currently need to do anything about the freelist because
	 * we don't read it for salvage operations.
	 */

	return (0);
}

/*
 * __wt_block_salvage_end --
 *	End a file salvage.
 */
int
__wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block, int success)
{
	/*
	 * If not successful, discard the free-list, it's not useful, and
	 * don't write back an updated description block.
	 */
	if (!success) {
		F_CLR(block, WT_BLOCK_OK);
		__wt_block_discard(session, block);
	}
	return (0);
}

/*
 * __wt_block_salvage_next --
 *	Return the next block from the file.
 */
int
__wt_block_salvage_next(
    WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BUF *buf,
    uint8_t *addr, uint32_t *addr_sizep, uint64_t *write_genp, int *eofp)
{
	WT_BLOCK_HEADER *blk;
	WT_FH *fh;
	off_t max, offset;
	uint32_t allocsize, cksum, size;
	uint8_t *endp;

	*eofp = 0;

	offset = block->slvg_off;
	fh = block->fh;
	allocsize = block->allocsize;
	WT_RET(__wt_buf_initsize(session, buf, allocsize));

	/* Read through the file, looking for pages with valid checksums. */
	for (max = fh->file_size;;) {
		if (offset >= max) {			/* Check eof. */
			*eofp = 1;
			return (0);
		}

		/*
		 * Read the start of a possible page (an allocation-size block),
		 * and get a page length from it.
		 */
		WT_RET(__wt_read(session, fh, offset, allocsize, buf->mem));
		blk = WT_BLOCK_HEADER_REF(buf->mem);

		/*
		 * The page can't be more than the min/max page size, or past
		 * the end of the file.
		 */
		size = blk->disk_size;
		cksum = blk->cksum;
		if (size == 0 ||
		    size % allocsize != 0 ||
		    size > WT_BTREE_PAGE_SIZE_MAX ||
		    offset + (off_t)size > max)
			goto skip;

		/*
		 * The page size isn't insane, read the entire page: reading the
		 * page validates the checksum and then decompresses the page as
		 * needed.  If reading the page fails, it's probably corruption,
		 * we ignore this block.
		 */
		if (__wt_block_read(session, block, buf, offset, size, cksum)) {
skip:			WT_VERBOSE(session, salvage,
			    "skipping %" PRIu32 "B at file offset %" PRIuMAX,
			    allocsize, (uintmax_t)offset);

			/*
			 * Free the block and make sure we don't return it more
			 * than once.
			 */
			WT_RET(
			    __wt_block_free(session, block, offset, allocsize));
			block->slvg_off = offset += allocsize;
			continue;
		}

		/* Valid block, return to our caller. */
		break;
	}

	/*
	 * Track the largest write-generation we've seen in the file so future
	 * writes, done after salvage completes, are preferred to these blocks.
	 */
	*write_genp = blk->write_gen;
	if (block->write_gen < blk->write_gen)
		block->write_gen = blk->write_gen;

	/* Re-create the address cookie that should reference this block. */
	endp = addr;
	WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
	*addr_sizep = WT_PTRDIFF32(endp, addr);

	/* We're successfully returning the page, move past it. */
	block->slvg_off = offset + size;

	return (0);
}