diff options
author | Luke Chen <luke.chen@mongodb.com> | 2019-08-21 05:23:37 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-08-21 05:23:37 +0000 |
commit | ac41c65f6355f83aac70136324c98561ac79daa1 (patch) | |
tree | a7c3f7ef090b59c6a06838a02c96bd1d49e1c729 /src/third_party/wiredtiger/src/block/block_ckpt_scan.c | |
parent | f54709196711c63a429b71f47c584661286d675f (diff) | |
download | mongo-ac41c65f6355f83aac70136324c98561ac79daa1.tar.gz |
Import wiredtiger: 7dfd9391862bc9a6d84868c4dc51689c45a3aacf from branch mongodb-4.4
ref: c809757d8b..7dfd939186
for: 4.3.1
WT-4658 Apply Clang Format
WT-4810 Adding WT_ERR_ASSERT and WT_RET_ASSERT macros
WT-5046 Prepared transactions aren't properly cleared from global table with WT_CONN_LOG_DEBUG_MODE enabled
Diffstat (limited to 'src/third_party/wiredtiger/src/block/block_ckpt_scan.c')
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_ckpt_scan.c | 664 |
1 files changed, 322 insertions, 342 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt_scan.c b/src/third_party/wiredtiger/src/block/block_ckpt_scan.c index 91c82d122f9..b7fda0d73b2 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt_scan.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt_scan.c @@ -47,373 +47,353 @@ /* * __wt_block_checkpoint_final -- - * Append metadata and checkpoint information to a buffer. + * Append metadata and checkpoint information to a buffer. */ int -__wt_block_checkpoint_final(WT_SESSION_IMPL *session, - WT_BLOCK *block, WT_ITEM *buf, uint8_t **file_sizep) +__wt_block_checkpoint_final( + WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t **file_sizep) { - WT_CKPT *ckpt; - size_t align_size, file_size_offset, len, size; - uint8_t *p; - - *file_sizep = 0; - - ckpt = block->final_ckpt; - p = (uint8_t *)buf->mem + buf->size; - - /* - * First, add in a counter to uniquely order checkpoints at our level. - * There's order and time information in the checkpoint itself, but the - * order isn't written and the time is only at second granularity. - * I'm using the Btree write generation for this purpose. That's - * safe and guaranteed correct because everything is locked down for the - * checkpoint, we're the only writer. Plus, because we use the write - * generation as a database connection generation, it's guaranteed to - * move forward and never repeat. - * It's a layering violation though, this is the only place the - * block manager uses the write generation. The alternative would be to - * add our own write-generation scheme in the block manager, storing a - * value and recovering it when we open the file. We could do that, as - * reading the final avail list when a file is opened is unavoidable, - * so we can retrieve the value written here when we open the file, but - * this approach is simpler. - */ - size = buf->size + WT_INTPACK64_MAXSIZE; - WT_RET(__wt_buf_extend(session, buf, size)); - p = (uint8_t *)buf->mem + buf->size; - WT_RET(__wt_vpack_uint(&p, 0, ++S2BT(session)->write_gen)); - buf->size = WT_PTRDIFF(p, buf->mem); - - /* - * Second, add space for the final file size as a packed value. We don't - * know how large it will be so skip the maximum required space. - */ - size = buf->size + WT_INTPACK64_MAXSIZE; - WT_RET(__wt_buf_extend(session, buf, size)); - p = (uint8_t *)buf->mem + buf->size; - memset(p, 0, WT_INTPACK64_MAXSIZE); - file_size_offset = buf->size; - buf->size = size; - - /* 3a, copy the metadata length into the buffer. */ - len = strlen(ckpt->block_metadata); - size = buf->size + WT_INTPACK64_MAXSIZE; - WT_RET(__wt_buf_extend(session, buf, size)); - p = (uint8_t *)buf->mem + buf->size; - WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len)); - buf->size = WT_PTRDIFF(p, buf->mem); - - /* 3b, copy the metadata into the buffer. */ - size = buf->size + len; - WT_RET(__wt_buf_extend(session, buf, size)); - p = (uint8_t *)buf->mem + buf->size; - memcpy(p, ckpt->block_metadata, len); - buf->size = size; - - /* 4a, copy the checkpoint list length into the buffer. */ - len = strlen(ckpt->block_checkpoint); - size = buf->size + WT_INTPACK64_MAXSIZE; - WT_RET(__wt_buf_extend(session, buf, size)); - p = (uint8_t *)buf->mem + buf->size; - WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len)); - buf->size = WT_PTRDIFF(p, buf->mem); - - /* 4b, copy the checkpoint list into the buffer. */ - size = buf->size + len; - WT_RET(__wt_buf_extend(session, buf, size)); - p = (uint8_t *)buf->mem + buf->size; - memcpy(p, ckpt->block_checkpoint, len); - buf->size = size; - - /* - * 5a, copy the not-quite-right checkpoint information length into the - * buffer. - */ - len = ckpt->raw.size; - size = buf->size + WT_INTPACK64_MAXSIZE; - WT_RET(__wt_buf_extend(session, buf, size)); - p = (uint8_t *)buf->mem + buf->size; - WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len)); - buf->size = WT_PTRDIFF(p, buf->mem); - - /* - * 5b, copy the not-quite-right checkpoint information into the buffer. - */ - size = buf->size + len; - WT_RET(__wt_buf_extend(session, buf, size)); - p = (uint8_t *)buf->mem + buf->size; - memcpy(p, ckpt->raw.data, len); - buf->size = size; - - /* - * We might have grown the buffer beyond the original allocation size, - * make sure that we're still in compliance. - */ - align_size = WT_ALIGN(buf->size, block->allocsize); - if (align_size > buf->memsize) - WT_RET(__wt_buf_extend(session, buf, align_size)); - - *file_sizep = (uint8_t *)buf->mem + file_size_offset; - - return (0); + WT_CKPT *ckpt; + size_t align_size, file_size_offset, len, size; + uint8_t *p; + + *file_sizep = 0; + + ckpt = block->final_ckpt; + p = (uint8_t *)buf->mem + buf->size; + + /* + * First, add in a counter to uniquely order checkpoints at our level. + * There's order and time information in the checkpoint itself, but the + * order isn't written and the time is only at second granularity. + * I'm using the Btree write generation for this purpose. That's + * safe and guaranteed correct because everything is locked down for the + * checkpoint, we're the only writer. Plus, because we use the write + * generation as a database connection generation, it's guaranteed to + * move forward and never repeat. + * It's a layering violation though, this is the only place the + * block manager uses the write generation. The alternative would be to + * add our own write-generation scheme in the block manager, storing a + * value and recovering it when we open the file. We could do that, as + * reading the final avail list when a file is opened is unavoidable, + * so we can retrieve the value written here when we open the file, but + * this approach is simpler. + */ + size = buf->size + WT_INTPACK64_MAXSIZE; + WT_RET(__wt_buf_extend(session, buf, size)); + p = (uint8_t *)buf->mem + buf->size; + WT_RET(__wt_vpack_uint(&p, 0, ++S2BT(session)->write_gen)); + buf->size = WT_PTRDIFF(p, buf->mem); + + /* + * Second, add space for the final file size as a packed value. We don't know how large it will + * be so skip the maximum required space. + */ + size = buf->size + WT_INTPACK64_MAXSIZE; + WT_RET(__wt_buf_extend(session, buf, size)); + p = (uint8_t *)buf->mem + buf->size; + memset(p, 0, WT_INTPACK64_MAXSIZE); + file_size_offset = buf->size; + buf->size = size; + + /* 3a, copy the metadata length into the buffer. */ + len = strlen(ckpt->block_metadata); + size = buf->size + WT_INTPACK64_MAXSIZE; + WT_RET(__wt_buf_extend(session, buf, size)); + p = (uint8_t *)buf->mem + buf->size; + WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len)); + buf->size = WT_PTRDIFF(p, buf->mem); + + /* 3b, copy the metadata into the buffer. */ + size = buf->size + len; + WT_RET(__wt_buf_extend(session, buf, size)); + p = (uint8_t *)buf->mem + buf->size; + memcpy(p, ckpt->block_metadata, len); + buf->size = size; + + /* 4a, copy the checkpoint list length into the buffer. */ + len = strlen(ckpt->block_checkpoint); + size = buf->size + WT_INTPACK64_MAXSIZE; + WT_RET(__wt_buf_extend(session, buf, size)); + p = (uint8_t *)buf->mem + buf->size; + WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len)); + buf->size = WT_PTRDIFF(p, buf->mem); + + /* 4b, copy the checkpoint list into the buffer. */ + size = buf->size + len; + WT_RET(__wt_buf_extend(session, buf, size)); + p = (uint8_t *)buf->mem + buf->size; + memcpy(p, ckpt->block_checkpoint, len); + buf->size = size; + + /* + * 5a, copy the not-quite-right checkpoint information length into the + * buffer. + */ + len = ckpt->raw.size; + size = buf->size + WT_INTPACK64_MAXSIZE; + WT_RET(__wt_buf_extend(session, buf, size)); + p = (uint8_t *)buf->mem + buf->size; + WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len)); + buf->size = WT_PTRDIFF(p, buf->mem); + + /* + * 5b, copy the not-quite-right checkpoint information into the buffer. + */ + size = buf->size + len; + WT_RET(__wt_buf_extend(session, buf, size)); + p = (uint8_t *)buf->mem + buf->size; + memcpy(p, ckpt->raw.data, len); + buf->size = size; + + /* + * We might have grown the buffer beyond the original allocation size, make sure that we're + * still in compliance. + */ + align_size = WT_ALIGN(buf->size, block->allocsize); + if (align_size > buf->memsize) + WT_RET(__wt_buf_extend(session, buf, align_size)); + + *file_sizep = (uint8_t *)buf->mem + file_size_offset; + + return (0); } struct saved_block_info { - uint64_t write_gen; - wt_off_t offset; - uint32_t size; - uint32_t checksum; - uint64_t file_size; + uint64_t write_gen; + wt_off_t offset; + uint32_t size; + uint32_t checksum; + uint64_t file_size; - char *metadata; - char *checkpoint_list; + char *metadata; + char *checkpoint_list; - WT_ITEM *checkpoint; + WT_ITEM *checkpoint; }; /* * __block_checkpoint_update -- - * Update the checkpoint information for the file. + * Update the checkpoint information for the file. */ static int -__block_checkpoint_update( - WT_SESSION_IMPL *session, WT_BLOCK *block, struct saved_block_info *info) +__block_checkpoint_update(WT_SESSION_IMPL *session, WT_BLOCK *block, struct saved_block_info *info) { - WT_BLOCK_CKPT ci; - WT_ITEM *checkpoint; - uint8_t *endp; - - memset(&ci, 0, sizeof(ci)); - checkpoint = info->checkpoint; - - if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) - __wt_ckpt_verbose( - session, block, "import original", NULL, checkpoint->mem); - - /* - * Convert the final checkpoint data blob to a WT_BLOCK_CKPT structure, - * update it with the avail list information, and convert it back to a - * data blob. - */ - WT_RET(__wt_block_buffer_to_ckpt( - session, block, checkpoint->data, &ci)); - ci.avail.offset = info->offset; - ci.avail.size = info->size; - ci.avail.checksum = info->checksum; - ci.file_size = (wt_off_t)info->file_size; - WT_RET(__wt_buf_extend( - session, checkpoint, WT_BLOCK_CHECKPOINT_BUFFER)); - endp = checkpoint->mem; - WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, &ci, false)); - checkpoint->size = WT_PTRDIFF(endp, checkpoint->mem); - - if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) - __wt_ckpt_verbose( - session, block, "import replace", NULL, checkpoint->mem); - - return (0); + WT_BLOCK_CKPT ci; + WT_ITEM *checkpoint; + uint8_t *endp; + + memset(&ci, 0, sizeof(ci)); + checkpoint = info->checkpoint; + + if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) + __wt_ckpt_verbose(session, block, "import original", NULL, checkpoint->mem); + + /* + * Convert the final checkpoint data blob to a WT_BLOCK_CKPT structure, update it with the avail + * list information, and convert it back to a data blob. + */ + WT_RET(__wt_block_buffer_to_ckpt(session, block, checkpoint->data, &ci)); + ci.avail.offset = info->offset; + ci.avail.size = info->size; + ci.avail.checksum = info->checksum; + ci.file_size = (wt_off_t)info->file_size; + WT_RET(__wt_buf_extend(session, checkpoint, WT_BLOCK_CHECKPOINT_BUFFER)); + endp = checkpoint->mem; + WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, &ci, false)); + checkpoint->size = WT_PTRDIFF(endp, checkpoint->mem); + + if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) + __wt_ckpt_verbose(session, block, "import replace", NULL, checkpoint->mem); + + return (0); } -#define WT_BLOCK_SKIP(a) do { \ - if ((a) != 0) \ - continue; \ -} while (0) +#define WT_BLOCK_SKIP(a) \ + do { \ + if ((a) != 0) \ + continue; \ + } while (0) /* * __wt_block_checkpoint_last -- - * Scan a file for checkpoints, returning the last one we find. + * Scan a file for checkpoints, returning the last one we find. */ int -__wt_block_checkpoint_last(WT_SESSION_IMPL *session, WT_BLOCK *block, - char **metadatap, char **checkpoint_listp, WT_ITEM *checkpoint) +__wt_block_checkpoint_last(WT_SESSION_IMPL *session, WT_BLOCK *block, char **metadatap, + char **checkpoint_listp, WT_ITEM *checkpoint) { - struct saved_block_info *best, _best, *current, _current, *saved_tmp; - WT_BLOCK_HEADER *blk; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_FH *fh; - const WT_PAGE_HEADER *dsk; - wt_off_t ext_off, ext_size, offset; - uint64_t len, nblocks, write_gen; - uint32_t checksum, size; - const uint8_t *p, *t; - bool found; - - *metadatap = *checkpoint_listp = NULL; - WT_RET(__wt_buf_init(session, checkpoint, WT_BLOCK_CHECKPOINT_BUFFER)); - - /* - * Initialize a pair of structures that track the best and current - * checkpoints found so far. This is a little trickier than normal - * because we don't want to start saving a checkpoint only to find - * out it's not one we can use. I doubt that can happen and it - * suggests corruption, but half-a-checkpoint isn't a good place to - * be. Only swap to a new "best" checkpoint if we read the whole - * thing successfully. - * - * Don't re-order these lines: it's done this way so the WT_ITEMs - * are always initialized and error handling works. - */ - memset((best = &_best), 0, sizeof(_best)); - memset((current = &_current), 0, sizeof(_current)); - WT_ERR(__wt_scr_alloc(session, 0, &best->checkpoint)); - WT_ERR(__wt_scr_alloc(session, 0, ¤t->checkpoint)); - - found = false; - ext_off = 0; /* [-Werror=maybe-uninitialized] */ - ext_size = 0; - len = write_gen = 0; - - WT_ERR(__wt_scr_alloc(session, 64 * 1024, &tmp)); - - F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE); - - /* - * Scan the file for pages, using the minimum possible WiredTiger - * allocation size. - */ - fh = block->fh; - for (nblocks = 0, offset = 0; offset < block->size; offset += size) { - /* Report progress occasionally. */ -#define WT_CHECKPOINT_LIST_PROGRESS_INTERVAL 100 - if (++nblocks % WT_CHECKPOINT_LIST_PROGRESS_INTERVAL == 0) - WT_ERR(__wt_progress(session, NULL, nblocks)); - - /* - * Read the start of a possible page and get a block length from - * it. Move to the next allocation sized boundary, we'll never - * consider this one again. - */ - if ((ret = __wt_read(session, fh, - offset, (size_t)WT_BTREE_MIN_ALLOC_SIZE, tmp->mem)) != 0) - break; - blk = WT_BLOCK_HEADER_REF(tmp->mem); - __wt_block_header_byteswap(blk); - size = blk->disk_size; - checksum = blk->checksum; - - /* - * Check the block size: if it's not insane, read the block. - * Reading the block validates any checksum. The file might - * reasonably have garbage at the end, and we're not here to - * detect that. Ignore problems, subsequent file verification - * can deal with any corruption. If the block isn't valid, - * skip to the next possible block. - */ - if (__wt_block_offset_invalid(block, offset, size) || - __wt_block_read_off( - session, block, tmp, offset, size, checksum) != 0) { - size = WT_BTREE_MIN_ALLOC_SIZE; - continue; - } - - dsk = tmp->mem; - if (dsk->type != WT_PAGE_BLOCK_MANAGER) - continue; - - p = WT_BLOCK_HEADER_BYTE(tmp->mem); - WT_BLOCK_SKIP(__wt_extlist_read_pair(&p, &ext_off, &ext_size)); - if (ext_off != WT_BLOCK_EXTLIST_MAGIC || ext_size != 0) - continue; - for (;;) { - if ((ret = __wt_extlist_read_pair( - &p, &ext_off, &ext_size)) != 0) - break; - if (ext_off == WT_BLOCK_INVALID_OFFSET) - break; - } - if (ret != 0) { - WT_NOT_READ(ret, 0); - continue; - } - /* - * Note the less-than check of WT_BLOCK_EXTLIST_VERSION_CKPT, - * that way we can extend this with additional values in the - * future. - */ - if (ext_size < WT_BLOCK_EXTLIST_VERSION_CKPT) - continue; - - /* - * Skip any entries that aren't the most recent we've seen so - * far. - */ - WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &write_gen)); - if (write_gen < best->write_gen) - continue; - - __wt_verbose(session, WT_VERB_CHECKPOINT, - "scan: checkpoint block at offset %" PRIuMAX - ", generation #%" PRIu64, - (uintmax_t)offset, write_gen); - - current->write_gen = write_gen; - current->offset = offset; - current->size = size; - current->checksum = checksum; - - /* - * The file size is in a fixed-size chunk of data, although it's - * packed (for portability). - */ - t = p; - WT_BLOCK_SKIP(__wt_vunpack_uint(&t, 0, ¤t->file_size)); - p += WT_INTPACK64_MAXSIZE; - - /* Save a copy of the metadata. */ - __wt_free(session, current->metadata); - WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len)); - WT_ERR(__wt_strndup(session, p, len, ¤t->metadata)); - p += len; - - /* Save a copy of the checkpoint list. */ - __wt_free(session, current->checkpoint_list); - WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len)); - WT_ERR(__wt_strndup( - session, p, len, ¤t->checkpoint_list)); - p += len; - - /* Save a copy of the checkpoint information. */ - WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len)); - WT_ERR(__wt_buf_set(session, current->checkpoint, p, len)); - - /* A new winner, swap the "best" and "current" information. */ - saved_tmp = best; - best = current; - current = saved_tmp; - found = true; - } - - if (!found) - WT_ERR_MSG(session, WT_NOTFOUND, - "%s: no final checkpoint found in file scan", - block->name); - - /* Correct the checkpoint. */ - WT_ERR(__block_checkpoint_update(session, block, best)); - - /* - * Copy the information out to our caller. Do the WT_ITEM first, it's - * the only thing left that can fail and simplifies error handling. - */ - WT_ERR(__wt_buf_set(session, - checkpoint, best->checkpoint->data, best->checkpoint->size)); - *metadatap = best->metadata; - best->metadata = NULL; - *checkpoint_listp = best->checkpoint_list; - best->checkpoint_list = NULL; + struct saved_block_info *best, _best, *current, _current, *saved_tmp; + WT_BLOCK_HEADER *blk; + WT_DECL_ITEM(tmp); + WT_DECL_RET; + WT_FH *fh; + const WT_PAGE_HEADER *dsk; + wt_off_t ext_off, ext_size, offset; + uint64_t len, nblocks, write_gen; + uint32_t checksum, size; + const uint8_t *p, *t; + bool found; + + *metadatap = *checkpoint_listp = NULL; + WT_RET(__wt_buf_init(session, checkpoint, WT_BLOCK_CHECKPOINT_BUFFER)); + + /* + * Initialize a pair of structures that track the best and current + * checkpoints found so far. This is a little trickier than normal + * because we don't want to start saving a checkpoint only to find + * out it's not one we can use. I doubt that can happen and it + * suggests corruption, but half-a-checkpoint isn't a good place to + * be. Only swap to a new "best" checkpoint if we read the whole + * thing successfully. + * + * Don't re-order these lines: it's done this way so the WT_ITEMs + * are always initialized and error handling works. + */ + memset((best = &_best), 0, sizeof(_best)); + memset((current = &_current), 0, sizeof(_current)); + WT_ERR(__wt_scr_alloc(session, 0, &best->checkpoint)); + WT_ERR(__wt_scr_alloc(session, 0, ¤t->checkpoint)); + + found = false; + ext_off = 0; /* [-Werror=maybe-uninitialized] */ + ext_size = 0; + len = write_gen = 0; + + WT_ERR(__wt_scr_alloc(session, 64 * 1024, &tmp)); + + F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE); + + /* + * Scan the file for pages, using the minimum possible WiredTiger allocation size. + */ + fh = block->fh; + for (nblocks = 0, offset = 0; offset < block->size; offset += size) { +/* Report progress occasionally. */ +#define WT_CHECKPOINT_LIST_PROGRESS_INTERVAL 100 + if (++nblocks % WT_CHECKPOINT_LIST_PROGRESS_INTERVAL == 0) + WT_ERR(__wt_progress(session, NULL, nblocks)); + + /* + * Read the start of a possible page and get a block length from it. Move to the next + * allocation sized boundary, we'll never consider this one again. + */ + if ((ret = __wt_read(session, fh, offset, (size_t)WT_BTREE_MIN_ALLOC_SIZE, tmp->mem)) != 0) + break; + blk = WT_BLOCK_HEADER_REF(tmp->mem); + __wt_block_header_byteswap(blk); + size = blk->disk_size; + checksum = blk->checksum; + + /* + * Check the block size: if it's not insane, read the block. Reading the block validates any + * checksum. The file might reasonably have garbage at the end, and we're not here to detect + * that. Ignore problems, subsequent file verification can deal with any corruption. If the + * block isn't valid, skip to the next possible block. + */ + if (__wt_block_offset_invalid(block, offset, size) || + __wt_block_read_off(session, block, tmp, offset, size, checksum) != 0) { + size = WT_BTREE_MIN_ALLOC_SIZE; + continue; + } + + dsk = tmp->mem; + if (dsk->type != WT_PAGE_BLOCK_MANAGER) + continue; + + p = WT_BLOCK_HEADER_BYTE(tmp->mem); + WT_BLOCK_SKIP(__wt_extlist_read_pair(&p, &ext_off, &ext_size)); + if (ext_off != WT_BLOCK_EXTLIST_MAGIC || ext_size != 0) + continue; + for (;;) { + if ((ret = __wt_extlist_read_pair(&p, &ext_off, &ext_size)) != 0) + break; + if (ext_off == WT_BLOCK_INVALID_OFFSET) + break; + } + if (ret != 0) { + WT_NOT_READ(ret, 0); + continue; + } + /* + * Note the less-than check of WT_BLOCK_EXTLIST_VERSION_CKPT, that way we can extend this + * with additional values in the future. + */ + if (ext_size < WT_BLOCK_EXTLIST_VERSION_CKPT) + continue; + + /* + * Skip any entries that aren't the most recent we've seen so far. + */ + WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &write_gen)); + if (write_gen < best->write_gen) + continue; + + __wt_verbose(session, WT_VERB_CHECKPOINT, + "scan: checkpoint block at offset %" PRIuMAX ", generation #%" PRIu64, (uintmax_t)offset, + write_gen); + + current->write_gen = write_gen; + current->offset = offset; + current->size = size; + current->checksum = checksum; + + /* + * The file size is in a fixed-size chunk of data, although it's packed (for portability). + */ + t = p; + WT_BLOCK_SKIP(__wt_vunpack_uint(&t, 0, ¤t->file_size)); + p += WT_INTPACK64_MAXSIZE; + + /* Save a copy of the metadata. */ + __wt_free(session, current->metadata); + WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len)); + WT_ERR(__wt_strndup(session, p, len, ¤t->metadata)); + p += len; + + /* Save a copy of the checkpoint list. */ + __wt_free(session, current->checkpoint_list); + WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len)); + WT_ERR(__wt_strndup(session, p, len, ¤t->checkpoint_list)); + p += len; + + /* Save a copy of the checkpoint information. */ + WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len)); + WT_ERR(__wt_buf_set(session, current->checkpoint, p, len)); + + /* A new winner, swap the "best" and "current" information. */ + saved_tmp = best; + best = current; + current = saved_tmp; + found = true; + } + + if (!found) + WT_ERR_MSG(session, WT_NOTFOUND, "%s: no final checkpoint found in file scan", block->name); + + /* Correct the checkpoint. */ + WT_ERR(__block_checkpoint_update(session, block, best)); + + /* + * Copy the information out to our caller. Do the WT_ITEM first, it's the only thing left that + * can fail and simplifies error handling. + */ + WT_ERR(__wt_buf_set(session, checkpoint, best->checkpoint->data, best->checkpoint->size)); + *metadatap = best->metadata; + best->metadata = NULL; + *checkpoint_listp = best->checkpoint_list; + best->checkpoint_list = NULL; err: - __wt_free(session, best->metadata); - __wt_free(session, best->checkpoint_list); - __wt_scr_free(session, &best->checkpoint); - __wt_free(session, current->metadata); - __wt_free(session, current->checkpoint_list); - __wt_scr_free(session, ¤t->checkpoint); + __wt_free(session, best->metadata); + __wt_free(session, best->checkpoint_list); + __wt_scr_free(session, &best->checkpoint); + __wt_free(session, current->metadata); + __wt_free(session, current->checkpoint_list); + __wt_scr_free(session, ¤t->checkpoint); - __wt_scr_free(session, &tmp); + __wt_scr_free(session, &tmp); - F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE); - return (ret); + F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE); + return (ret); } |