summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dist/api_data.py8
-rw-r--r--dist/filelist6
-rw-r--r--dist/s_string.ok11
-rw-r--r--src/block/block_addr.c56
-rw-r--r--src/block/block_ckpt.c708
-rw-r--r--src/block/block_ext.c22
-rw-r--r--src/block/block_mgr.c36
-rw-r--r--src/block/block_open.c4
-rw-r--r--src/block/block_slvg.c14
-rw-r--r--src/block/block_snap.c706
-rw-r--r--src/block/block_vrfy.c244
-rw-r--r--src/block/block_write.c2
-rw-r--r--src/btree/bt_handle.c35
-rw-r--r--src/btree/bt_slvg.c8
-rw-r--r--src/btree/bt_sync.c8
-rw-r--r--src/btree/bt_vrfy.c48
-rw-r--r--src/btree/rec_write.c53
-rw-r--r--src/config/config_def.c27
-rw-r--r--src/conn/conn_api.c2
-rw-r--r--src/conn/conn_btree.c22
-rw-r--r--src/cursor/cur_std.c2
-rw-r--r--src/docs/checkpoints.dox2
-rw-r--r--src/docs/spell.ok2
-rw-r--r--src/docs/upgrading.dox7
-rw-r--r--src/include/api.h20
-rw-r--r--src/include/block.h36
-rw-r--r--src/include/btree.h9
-rw-r--r--src/include/extern.h116
-rw-r--r--src/include/meta.h26
-rw-r--r--src/include/wiredtiger.in10
-rw-r--r--src/include/wt_internal.h8
-rw-r--r--src/meta/meta_api.c20
-rw-r--r--src/meta/meta_ckpt.c411
-rw-r--r--src/meta/meta_snapshot.c410
-rw-r--r--src/meta/meta_track.c6
-rw-r--r--src/schema/schema_truncate.c2
-rw-r--r--src/session/session_btree.c18
-rw-r--r--src/session/session_salvage.c48
-rw-r--r--src/txn/txn.c110
-rw-r--r--src/txn/txn_ckpt.c333
-rw-r--r--src/txn/txn_snapshot.c233
-rw-r--r--src/utilities/util_list.c23
42 files changed, 1938 insertions, 1934 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index b1793cf3eac..3adea9ca157 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -158,8 +158,8 @@ file_config = format_meta + [
# File metadata, including both configurable and non-configurable (internal)
file_meta = file_config + [
- Config('snapshot', '', r'''
- the file snapshot entries'''),
+ Config('checkpoint', '', r'''
+ the file checkpoint entries'''),
Config('version', '(major=0,minor=0)', r'''
the file version'''),
]
@@ -376,6 +376,7 @@ methods = {
list, such as <code>"verbose=[evictserver,read]"</code>''',
type='list', choices=[
'block',
+ 'ckpt',
'evict',
'evictserver',
'fileops',
@@ -385,7 +386,6 @@ methods = {
'readserver',
'reconcile',
'salvage',
- 'snapshot',
'verify',
'write']),
]),
@@ -400,6 +400,7 @@ flags = {
'rec_evict' : [ 'REC_SINGLE' ],
'verbose' : [
'VERB_block',
+ 'VERB_ckpt',
'VERB_evict',
'VERB_evictserver',
'VERB_fileops',
@@ -409,7 +410,6 @@ flags = {
'VERB_readserver',
'VERB_reconcile',
'VERB_salvage',
- 'VERB_snapshot',
'VERB_verify',
'VERB_write'
],
diff --git a/dist/filelist b/dist/filelist
index 70cd5819540..d6baf04c90a 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -4,13 +4,13 @@
src/api/api_strerror.c
src/api/api_version.c
src/block/block_addr.c
+src/block/block_ckpt.c
src/block/block_cksum.c
src/block/block_ext.c
src/block/block_mgr.c
src/block/block_open.c
src/block/block_read.c
src/block/block_slvg.c
-src/block/block_snap.c
src/block/block_vrfy.c
src/block/block_write.c
src/btree/bt_bulk.c
@@ -66,7 +66,7 @@ src/log/log.c
src/log/log_desc.c
src/meta/meta_api.c
src/meta/meta_apply.c
-src/meta/meta_snapshot.c
+src/meta/meta_ckpt.c
src/meta/meta_table.c
src/meta/meta_track.c
src/meta/meta_turtle.c
@@ -116,4 +116,4 @@ src/support/scratch.c
src/support/sess_dump.c
src/support/stat.c
src/txn/txn.c
-src/txn/txn_snapshot.c
+src/txn/txn_ckpt.c
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 59659e7641a..dbb617fe885 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -25,6 +25,7 @@ Btree
CAS
CELL's
CELLs
+CKPT
CLR
COL's
CONCAT
@@ -150,7 +151,7 @@ WIREDTIGER
WinNT
WiredTiger
WiredTiger's
-WiredTigerInternalSnapshot
+WiredTigerInternalCheckpoint
Wuninitialized
XP
__wt_epoch
@@ -197,6 +198,9 @@ checksum
checksums
chk
cip
+ckpt
+ckptfrag
+ckptlist
cksum
clr
cmp
@@ -447,11 +451,6 @@ sizev
skiplist
skiplists
slvg
-snapall
-snapfrag
-snapfrom
-snaplist
-snapto
snprintf
sp
spinlock
diff --git a/src/block/block_addr.c b/src/block/block_addr.c
index 2b5d10ef7b6..68e350e1f12 100644
--- a/src/block/block_addr.c
+++ b/src/block/block_addr.c
@@ -129,68 +129,68 @@ __wt_block_addr_string(WT_SESSION_IMPL *session,
}
/*
- * __wt_block_buffer_to_snapshot --
- * Convert a filesystem snapshot cookie into its components.
+ * __wt_block_buffer_to_ckpt --
+ * Convert a checkpoint cookie into its components.
*/
int
-__wt_block_buffer_to_snapshot(WT_SESSION_IMPL *session,
- WT_BLOCK *block, const uint8_t *p, WT_BLOCK_SNAPSHOT *si)
+__wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session,
+ WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci)
{
uint64_t a;
const uint8_t **pp;
- si->version = *p++;
- if (si->version != WT_BM_SNAPSHOT_VERSION)
- WT_RET_MSG(session, WT_ERROR, "illegal snapshot address");
+ ci->version = *p++;
+ if (ci->version != WT_BM_CHECKPOINT_VERSION)
+ WT_RET_MSG(session, WT_ERROR, "illegal checkpoint address");
pp = &p;
WT_RET(__block_buffer_to_addr(block, pp,
- &si->root_offset, &si->root_size, &si->root_cksum));
+ &ci->root_offset, &ci->root_size, &ci->root_cksum));
WT_RET(__block_buffer_to_addr(block, pp,
- &si->alloc.offset, &si->alloc.size, &si->alloc.cksum));
+ &ci->alloc.offset, &ci->alloc.size, &ci->alloc.cksum));
WT_RET(__block_buffer_to_addr(block, pp,
- &si->avail.offset, &si->avail.size, &si->avail.cksum));
+ &ci->avail.offset, &ci->avail.size, &ci->avail.cksum));
WT_RET(__block_buffer_to_addr(block, pp,
- &si->discard.offset, &si->discard.size, &si->discard.cksum));
+ &ci->discard.offset, &ci->discard.size, &ci->discard.cksum));
WT_RET(__wt_vunpack_uint(pp, 0, &a));
- si->file_size = (off_t)a;
+ ci->file_size = (off_t)a;
WT_RET(__wt_vunpack_uint(pp, 0, &a));
- si->snapshot_size = a;
+ ci->ckpt_size = a;
WT_RET(__wt_vunpack_uint(pp, 0, &a));
- si->write_gen = a;
+ ci->write_gen = a;
return (0);
}
/*
- * __wt_block_snapshot_to_buffer --
- * Convert the filesystem components into its snapshot cookie.
+ * __wt_block_ckpt_to_buffer --
+ * Convert the components into its checkpoint cookie.
*/
int
-__wt_block_snapshot_to_buffer(WT_SESSION_IMPL *session,
- WT_BLOCK *block, uint8_t **pp, WT_BLOCK_SNAPSHOT *si)
+__wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session,
+ WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci)
{
uint64_t a;
- if (si->version != WT_BM_SNAPSHOT_VERSION)
- WT_RET_MSG(session, WT_ERROR, "illegal snapshot address");
+ if (ci->version != WT_BM_CHECKPOINT_VERSION)
+ WT_RET_MSG(session, WT_ERROR, "illegal checkpoint address");
- (*pp)[0] = si->version;
+ (*pp)[0] = ci->version;
(*pp)++;
WT_RET(__wt_block_addr_to_buffer(block, pp,
- si->root_offset, si->root_size, si->root_cksum));
+ ci->root_offset, ci->root_size, ci->root_cksum));
WT_RET(__wt_block_addr_to_buffer(block, pp,
- si->alloc.offset, si->alloc.size, si->alloc.cksum));
+ ci->alloc.offset, ci->alloc.size, ci->alloc.cksum));
WT_RET(__wt_block_addr_to_buffer(block, pp,
- si->avail.offset, si->avail.size, si->avail.cksum));
+ ci->avail.offset, ci->avail.size, ci->avail.cksum));
WT_RET(__wt_block_addr_to_buffer(block, pp,
- si->discard.offset, si->discard.size, si->discard.cksum));
- a = (uint64_t)si->file_size;
+ ci->discard.offset, ci->discard.size, ci->discard.cksum));
+ a = (uint64_t)ci->file_size;
WT_RET(__wt_vpack_uint(pp, 0, a));
- a = (uint64_t)si->snapshot_size;
+ a = (uint64_t)ci->ckpt_size;
WT_RET(__wt_vpack_uint(pp, 0, a));
- a = si->write_gen;
+ a = ci->write_gen;
WT_RET(__wt_vpack_uint(pp, 0, a));
return (0);
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
new file mode 100644
index 00000000000..08e3856facd
--- /dev/null
+++ b/src/block/block_ckpt.c
@@ -0,0 +1,708 @@
+/*-
+ * Copyright (c) 2008-2012 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+static int __ckpt_process(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
+static int __ckpt_string(
+ WT_SESSION_IMPL *, WT_BLOCK *, const uint8_t *, WT_ITEM *);
+static int __ckpt_update(WT_SESSION_IMPL *,
+ WT_BLOCK *, WT_CKPT *, WT_BLOCK_CKPT *, uint64_t, int);
+
+/*
+ * __wt_block_ckpt_init --
+ * Initialize a checkpoint structure.
+ */
+int
+__wt_block_ckpt_init(WT_SESSION_IMPL *session,
+ WT_BLOCK *block, WT_BLOCK_CKPT *ci, const char *name, int is_live)
+{
+ WT_DECL_RET;
+
+ /*
+ * If we're loading a new live checkpoint, there shouldn't be one
+ * already loaded. The btree engine should prevent this from ever
+ * happening, but paranoia is a healthy thing.
+ */
+ if (is_live) {
+ __wt_spin_lock(session, &block->live_lock);
+ if (block->live_load)
+ ret = EINVAL;
+ else
+ block->live_load = 1;
+ __wt_spin_unlock(session, &block->live_lock);
+ if (ret)
+ WT_RET_MSG(
+ session, EINVAL, "checkpoint already loaded");
+ }
+
+ memset(ci, 0, sizeof(*ci));
+
+ ci->root_offset = WT_BLOCK_INVALID_OFFSET;
+
+ WT_RET(__wt_block_extlist_init(session, &ci->alloc, name, "alloc"));
+ WT_RET(__wt_block_extlist_init(session, &ci->avail, name, "avail"));
+ WT_RET(__wt_block_extlist_init(session, &ci->discard, name, "discard"));
+
+ ci->file_size = WT_BLOCK_DESC_SECTOR;
+ WT_RET(__wt_block_extlist_init(
+ session, &ci->ckpt_avail, name, "ckpt_avail"));
+
+ return (0);
+}
+
+/*
+ * __wt_block_checkpoint_load --
+ * Load a checkpoint.
+ */
+int
+__wt_block_checkpoint_load(WT_SESSION_IMPL *session,
+ WT_BLOCK *block, WT_ITEM *dsk, const uint8_t *addr, uint32_t addr_size,
+ int readonly)
+{
+ WT_BLOCK_CKPT *ci;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+
+ WT_UNUSED(addr_size);
+
+ /*
+ * Sometimes we don't find a root page (we weren't given a checkpoint,
+ * or the referenced checkpoint was empty). In that case we return a
+ * root page size of 0. Set that up now.
+ */
+ dsk->size = 0;
+
+ ci = &block->live;
+ WT_RET(__wt_block_ckpt_init(session, block, ci, "live", 1));
+
+ if (WT_VERBOSE_ISSET(session, ckpt)) {
+ if (addr != NULL) {
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__ckpt_string(session, block, addr, tmp));
+ }
+ WT_VERBOSE_ERR(session, ckpt,
+ "%s: load-checkpoint: %s", block->name,
+ addr == NULL ? "[Empty]" : (char *)tmp->data);
+ }
+
+ /* If not loading a checkpoint from disk, we're done. */
+ if (addr == NULL || addr_size == 0)
+ return (0);
+
+ /* Crack the checkpoint cookie. */
+ if (addr != NULL)
+ WT_ERR(__wt_block_buffer_to_ckpt(session, block, addr, ci));
+
+ /* Verify sets up next. */
+ if (block->verify)
+ WT_ERR(__wt_verify_ckpt_load(session, block, ci));
+
+ /* Read, and optionally verify, any root page. */
+ if (ci->root_offset != WT_BLOCK_INVALID_OFFSET) {
+ WT_ERR(__wt_block_read_off(session, block,
+ dsk, ci->root_offset, ci->root_size, ci->root_cksum));
+ if (block->verify) {
+ if (tmp == NULL) {
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__ckpt_string(
+ session, block, addr, tmp));
+ }
+ WT_ERR(
+ __wt_verify_dsk(session, (char *)tmp->data, dsk));
+ }
+ }
+
+ /*
+ * Rolling a checkpoint forward requires the avail list, the blocks from
+ * which we can allocate.
+ */
+ if (!readonly)
+ WT_ERR(__wt_block_extlist_read(session, block, &ci->avail));
+
+ /*
+ * If the checkpoint can be written, that means anything written after
+ * the checkpoint is no longer interesting. Truncate the file.
+ */
+ if (!readonly) {
+ WT_VERBOSE_ERR(session, ckpt,
+ "truncate file to %" PRIuMAX, (uintmax_t)ci->file_size);
+ WT_ERR(__wt_ftruncate(session, block->fh, ci->file_size));
+ }
+
+ if (0) {
+err: (void)__wt_block_checkpoint_unload(session, block);
+ }
+
+ __wt_scr_free(&tmp);
+ return (ret);
+}
+
+/*
+ * __wt_block_checkpoint_unload --
+ * Unload a checkpoint.
+ */
+int
+__wt_block_checkpoint_unload(WT_SESSION_IMPL *session, WT_BLOCK *block)
+{
+ WT_BLOCK_CKPT *ci;
+ WT_DECL_RET;
+
+ WT_VERBOSE_RETVAL(
+ session, ckpt, ret, "%s: unload checkpoint", block->name);
+
+ ci = &block->live;
+
+ /* Verify cleanup. */
+ if (block->verify)
+ WT_TRET(__wt_verify_ckpt_unload(session, block, ci));
+
+ __wt_block_ckpt_destroy(session, ci);
+
+ block->live_load = 0;
+
+ return (ret);
+}
+
+/*
+ * __wt_block_ckpt_destroy --
+ * Clear a checkpoint structure.
+ */
+void
+__wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci)
+{
+ /* Discard the extent lists. */
+ __wt_block_extlist_free(session, &ci->alloc);
+ __wt_block_extlist_free(session, &ci->avail);
+ __wt_block_extlist_free(session, &ci->discard);
+ __wt_block_extlist_free(session, &ci->ckpt_avail);
+}
+
+/*
+ * __wt_block_checkpoint --
+ * Create a new checkpoint.
+ */
+int
+__wt_block_checkpoint(WT_SESSION_IMPL *session,
+ WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase)
+{
+ WT_BLOCK_CKPT *ci;
+
+ ci = &block->live;
+ ci->version = WT_BM_CHECKPOINT_VERSION;
+
+ /*
+ * Write the root page: it's possible for there to be a checkpoint of
+ * an empty tree, in which case, we store an illegal root offset.
+ *
+ * XXX
+ * We happen to know that checkpoints are single-threaded above us in
+ * the btree engine. That's probably something we want to guarantee
+ * for any WiredTiger block manager.
+ */
+ if (buf == NULL) {
+ ci->root_offset = WT_BLOCK_INVALID_OFFSET;
+ ci->root_size = ci->root_cksum = 0;
+ } else
+ WT_RET(__wt_block_write_off(session, block, buf,
+ &ci->root_offset, &ci->root_size, &ci->root_cksum, 0));
+
+ /* Process the checkpoint list, deleting and updating as required. */
+ WT_RET(__ckpt_process(session, block, ckptbase));
+
+ /*
+ * Checkpoints have to hit disk (it would be reasonable to configure for
+ * lazy checkpoints, but we don't support them yet). Regardless, we're
+ * not holding any locks, other writers can proceed while we wait.
+ */
+ if (!F_ISSET(S2C(session), WT_CONN_NOSYNC))
+ WT_RET(__wt_fsync(session, block->fh));
+
+ return (0);
+}
+
+/*
+ * __ckpt_extlist_fblocks --
+ * If an extent list was read from disk, free its space to the live avail
+ * list.
+ */
+static inline int
+__ckpt_extlist_fblocks(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
+{
+ if (el->offset == WT_BLOCK_INVALID_OFFSET)
+ return (0);
+ return (__wt_block_insert_ext(
+ session, &block->live.avail, el->offset, el->size));
+}
+
+/*
+ * __ckpt_process --
+ * Process the list of checkpoints.
+ */
+static int
+__ckpt_process(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
+{
+ WT_BLOCK_CKPT *a, *b, *ci;
+ WT_CKPT *ckpt;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ uint64_t ckpt_size;
+ int deleting, locked;
+
+ ci = &block->live;
+ locked = 0;
+
+ /*
+ * We've allocated our last page, update the checkpoint size. We need
+ * to calculate the live system's checkpoint size before reading and
+ * merging checkpoint allocation and discard information from the
+ * checkpoints we're deleting, those operations change the underlying
+ * byte counts.
+ */
+ ckpt_size = ci->ckpt_size;
+ ckpt_size += ci->alloc.bytes;
+ ckpt_size -= ci->discard.bytes;
+
+ /*
+ * Extents newly available as a result of deleting previous checkpoints
+ * are added to a list of extents. The list should be empty, but there
+ * is no explicit "free the checkpoint information" call into the block
+ * manager; if there was an error in an upper level resulting in some
+ * previous checkpoint never being resolved, the list may not be empty.
+ *
+ * XXX
+ * This isn't sufficient, actually: we're going to leak all the blocks
+ * written as part of the last checkpoint because it was never resolved.
+ */
+ __wt_block_extlist_free(session, &ci->ckpt_avail);
+ WT_RET(__wt_block_extlist_init(
+ session, &ci->ckpt_avail, "live", "ckpt_avail"));
+
+ /*
+ * To delete a checkpoint, we'll need extent list for it, and we have to
+ * read that from the disk.
+ */
+ deleting = 0;
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ /*
+ * To delete a checkpoint, we'll need checkpoint information for
+ * it and the subsequent checkpoint. The test is tricky, load
+ * the current checkpoint's information if it's marked for
+ * deletion, or if it follows a checkpoint marked for deletion,
+ * where the boundary cases are the first checkpoint in the list
+ * and the last checkpoint in the list: if we're deleting the
+ * last checkpoint in the list, there's no next checkpoint, the
+ * checkpoint will be merged into the live tree.
+ */
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE) &&
+ (ckpt == ckptbase ||
+ F_ISSET(ckpt, WT_CKPT_ADD) ||
+ !F_ISSET(ckpt - 1, WT_CKPT_DELETE)))
+ continue;
+ deleting = 1;
+
+ /*
+ * Allocate a checkpoint structure, crack the cookie and read
+ * the checkpoint's extent lists.
+ *
+ * Ignore the avail list: checkpoint avail lists are only useful
+ * if we are rolling forward from the particular checkpoint and
+ * they represent our best understanding of what blocks can be
+ * allocated. If we are not operating on the live checkpoint,
+ * subsequent checkpoints might have allocated those blocks, and
+ * the avail list is useless. We don't discard it, because it
+ * is useful as part of verification, but we don't re-write it
+ * either.
+ */
+ WT_ERR(__wt_calloc(
+ session, 1, sizeof(WT_BLOCK_CKPT), &ckpt->bpriv));
+ ci = ckpt->bpriv;
+ WT_ERR(__wt_block_ckpt_init(session, block, ci, ckpt->name, 0));
+ WT_ERR(__wt_block_buffer_to_ckpt(
+ session, block, ckpt->raw.data, ci));
+ WT_ERR(__wt_block_extlist_read(session, block, &ci->alloc));
+ WT_ERR(__wt_block_extlist_read(session, block, &ci->discard));
+ }
+
+ /*
+ * Hold a lock so the live extent lists and the file size can't change
+ * underneath us. I suspect we'll tighten this if checkpoints take too
+ * much time away from real work: we read the historic checkpoint
+ * information without a lock, but we could also merge and re-write the
+ * delete checkpoint information without a lock, except for ranges
+ * merged into the live tree.
+ */
+ __wt_spin_lock(session, &block->live_lock);
+ locked = 1;
+
+ /* Skip the additional processing if we aren't deleting checkpoints. */
+ if (!deleting)
+ goto live_update;
+
+ /*
+ * Delete any no-longer-needed checkpoints: we do this first as it frees
+ * blocks to the live lists, and the freed blocks will then be included
+ * when writing the live extent lists.
+ */
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+
+ if (WT_VERBOSE_ISSET(session, ckpt)) {
+ if (tmp == NULL)
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__ckpt_string(
+ session, block, ckpt->raw.data, tmp));
+ WT_VERBOSE_ERR(session, ckpt,
+ "%s: delete-checkpoint: %s: %s",
+ block->name, ckpt->name, (char *)tmp->data);
+ }
+
+ /*
+ * Set the from/to checkpoint structures, where the "to" value
+ * may be the live tree.
+ */
+ a = ckpt->bpriv;
+ if (F_ISSET(ckpt + 1, WT_CKPT_ADD))
+ b = &block->live;
+ else
+ b = (ckpt + 1)->bpriv;
+
+ /*
+ * Free the root page: there's nothing special about this free,
+ * the root page is allocated using normal rules, that is, it
+ * may have been taken from the avail list, and was entered on
+ * the live system's alloc list at that time. We free it into
+ * the checkpoint's discard list, however, not the live system's
+ * list because it appears on the checkpoint's alloc list and so
+ * must be paired in the checkpoint.
+ */
+ if (a->root_offset != WT_BLOCK_INVALID_OFFSET)
+ WT_ERR(__wt_block_insert_ext(session,
+ &a->discard, a->root_offset, a->root_size));
+
+ /*
+ * Free the blocks used to hold the "from" checkpoint's extent
+ * lists directly to the live system's avail list, they were
+ * never on any alloc list. Include the "from" checkpoint's
+ * avail list, it's going away.
+ */
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &a->alloc));
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &a->avail));
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &a->discard));
+
+ /*
+ * Roll the "from" alloc and discard extent lists into the "to"
+ * checkpoint's lists.
+ */
+ if (a->alloc.entries != 0)
+ WT_ERR(__wt_block_extlist_merge(
+ session, &a->alloc, &b->alloc));
+ if (a->discard.entries != 0)
+ WT_ERR(__wt_block_extlist_merge(
+ session, &a->discard, &b->discard));
+
+ /*
+ * If the "to" checkpoint is also being deleted, we're done with
+ * it, it's merged into some other checkpoint in the next loop.
+ * This means the extent lists may aggregate over a number of
+ * checkpoints, but that's OK, they're disjoint sets of ranges.
+ */
+ if (F_ISSET(ckpt + 1, WT_CKPT_DELETE))
+ continue;
+
+ /*
+ * Find blocks for re-use: wherever the "to" checkpoint's
+ * allocate and discard lists overlap is fair game, move ranges
+ * appearing on both lists to the live checkpoint's newly
+ * available list.
+ */
+ WT_ERR(__wt_block_extlist_overlap(session, block, b));
+
+ /*
+ * If we're updating the live system's information, we're done.
+ */
+ if (F_ISSET(ckpt + 1, WT_CKPT_ADD))
+ continue;
+
+ /*
+ * We have to write the "to" checkpoint's extent lists out in
+ * new blocks, and update its cookie.
+ *
+ * Free the blocks used to hold the "to" checkpoint's extent
+ * lists directly to the live system's avail list, they were
+ * never on any alloc list. Don't include the "to" checkpoint's
+ * avail list, it's not changing.
+ */
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &b->alloc));
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &b->discard));
+
+ F_SET(ckpt + 1, WT_CKPT_UPDATE);
+ }
+
+ /* Update checkpoints marked for update. */
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_UPDATE)) {
+ WT_ASSERT(session, !F_ISSET(ckpt, WT_CKPT_ADD));
+ WT_ERR(__ckpt_update(
+ session, block, ckpt, ckpt->bpriv, 0, 0));
+ }
+
+live_update:
+ ci = &block->live;
+
+ /* Truncate the file if that's possible. */
+ WT_ERR(__wt_block_extlist_truncate(session, block, &ci->avail));
+
+ /* Update the final, added checkpoint based on the live system. */
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_ADD)) {
+ WT_ERR(__ckpt_update(
+ session, block, ckpt, ci, ckpt_size, 1));
+
+ /*
+ * XXX
+ * Our caller wants two pieces of information: the time
+ * the checkpoint was written and the final checkpoint
+ * size. This violates layering but the alternative is
+ * a call for the btree layer to crack the checkpoint
+ * cookie into its components, and that's a fair amount
+ * of work. (We could just read the system time in the
+ * session layer when updating the metadata file, but
+ * that won't work for the checkpoint size, and so we
+ * do both here.)
+ */
+ ckpt->ckpt_size = ci->ckpt_size;
+ WT_ERR(__wt_epoch(session, &ckpt->sec, NULL));
+ }
+
+ /*
+ * Reset the live system's alloc and discard extent lists, leave the
+ * avail list alone.
+ */
+ __wt_block_extlist_free(session, &ci->alloc);
+ WT_ERR(__wt_block_extlist_init(session, &ci->alloc, "live", "alloc"));
+ __wt_block_extlist_free(session, &ci->discard);
+ WT_ERR(
+ __wt_block_extlist_init(session, &ci->discard, "live", "discard"));
+
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * The first checkpoint in the system should always have an empty
+ * discard list. If we've read that checkpoint and/or created it,
+ * check.
+ */
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE))
+ break;
+ if ((a = ckpt->bpriv) == NULL)
+ a = &block->live;
+ if (a->discard.entries != 0) {
+ __wt_errx(session,
+ "checkpoint incorrectly has blocks on the discard list");
+ WT_ERR(WT_ERROR);
+ }
+#endif
+
+err: if (locked)
+ __wt_spin_unlock(session, &block->live_lock);
+
+ /* Discard any checkpoint information we loaded. */
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ if ((ci = ckpt->bpriv) != NULL) {
+ __wt_block_extlist_free(session, &ci->alloc);
+ __wt_block_extlist_free(session, &ci->avail);
+ __wt_block_extlist_free(session, &ci->discard);
+ }
+
+ __wt_scr_free(&tmp);
+ return (ret);
+}
+
+/*
+ * __ckpt_update --
+ * Update a checkpoint.
+ */
+static int
+__ckpt_update(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt,
+ WT_BLOCK_CKPT *ci, uint64_t ckpt_size, int is_live)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ uint8_t *endp;
+
+#ifdef HAVE_DIAGNOSTIC
+ /* Check the extent list combinations for overlaps. */
+ WT_RET(__wt_block_extlist_check(session, &ci->alloc, &ci->avail));
+ WT_RET(__wt_block_extlist_check(session, &ci->discard, &ci->avail));
+ WT_RET(__wt_block_extlist_check(session, &ci->alloc, &ci->discard));
+#endif
+ /*
+ * Write the checkpoint's extent lists; we only write an avail list for
+ * the live system, other checkpoint's avail lists are static and never
+ * change. When we do write the avail list for the live system it's
+ * two lists: the current avail list plus the list of blocks that are
+ * being made available as of the new checkpoint. We can't merge that
+ * second list into the real list yet, it's not truly available until
+ * the new checkpoint location has been saved to the metadata.
+ */
+ WT_RET(__wt_block_extlist_write(session, block, &ci->alloc, NULL));
+ if (is_live)
+ WT_RET(__wt_block_extlist_write(
+ session, block, &ci->avail, &ci->ckpt_avail));
+ WT_RET(__wt_block_extlist_write(session, block, &ci->discard, NULL));
+
+ /*
+ * Set the file size for the live system.
+ *
+ * XXX
+ * We do NOT set the file size when re-writing checkpoints because we
+ * want to test the checkpoint's blocks against a reasonable maximum
+ * file size during verification. This is bad: imagine a checkpoint
+ * appearing early in the file, re-written, and then the checkpoint
+ * requires blocks at the end of the file, blocks after the listed file
+ * size. If the application opens that checkpoint for writing
+ * (discarding subsequent checkpoints), we would truncate the file to
+ * the early chunk, discarding the re-written checkpoint information.
+ * The alternative, updating the file size has its own problems, in
+ * that case we'd work correctly, but we'd lose all of the blocks
+ * between the original checkpoint and the re-written checkpoint.
+ * Currently, there's no API to roll-forward intermediate checkpoints,
+ * if there ever is, this will need to be fixed.
+ */
+ if (is_live)
+ WT_RET(__wt_filesize(session, block->fh, &ci->file_size));
+
+ /* Set the checkpoint size for the live system. */
+ if (is_live)
+ ci->ckpt_size = ckpt_size;
+
+ /*
+ * Copy the checkpoint information into the checkpoint array's address
+ * cookie.
+ */
+ WT_RET(__wt_buf_init(session, &ckpt->raw, WT_BTREE_MAX_ADDR_COOKIE));
+ endp = ckpt->raw.mem;
+ WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, ci));
+ ckpt->raw.size = WT_PTRDIFF32(endp, ckpt->raw.mem);
+
+ if (WT_VERBOSE_ISSET(session, ckpt)) {
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__ckpt_string(session, block, ckpt->raw.data, tmp));
+ WT_VERBOSE_ERR(session, ckpt,
+ "%s: create-checkpoint: %s: %s",
+ block->name, ckpt->name, (char *)tmp->data);
+ }
+
+err: __wt_scr_free(&tmp);
+ return (ret);
+}
+
+/*
+ * __wt_block_checkpoint_resolve --
+ * Resolve a checkpoint.
+ */
+int
+__wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block)
+{
+ WT_BLOCK_CKPT *ci;
+ WT_DECL_RET;
+
+ ci = &block->live;
+
+ /*
+ * Checkpoints are a two-step process: first, write a new checkpoint to
+ * disk (including all the new extent lists for modified checkpoints
+ * and the live system). As part of this, create a list of file blocks
+ * newly available for reallocation, based on checkpoints being deleted.
+ * We then return the locations of the new checkpoint information to our
+ * caller. Our caller has to write that information into some kind of
+ * stable storage, and once that's done, we can actually allocate from
+ * that list of newly available file blocks. (We can't allocate from
+ * that list immediately because the allocation might happen before our
+ * caller saves the new checkpoint information, and if we crashed before
+ * the new checkpoint location was saved, we'd have overwritten blocks
+ * still referenced by checkpoints in the system.) In summary, there is
+ * a second step: after our caller saves the checkpoint information, we
+ * are called to add the newly available blocks into the live system's
+ * available list.
+ */
+ __wt_spin_lock(session, &block->live_lock);
+ ret = __wt_block_extlist_merge(session, &ci->ckpt_avail, &ci->avail);
+ __wt_spin_unlock(session, &block->live_lock);
+
+ /* Discard the list. */
+ __wt_block_extlist_free(session, &ci->ckpt_avail);
+
+ return (ret);
+}
+
+/*
+ * __ckpt_string --
+ * Return a printable string representation of a checkpoint address cookie.
+ */
+static int
+__ckpt_string(WT_SESSION_IMPL *session,
+ WT_BLOCK *block, const uint8_t *addr, WT_ITEM *buf)
+{
+ WT_BLOCK_CKPT *ci, _ci;
+
+ /* Initialize the checkpoint, crack the cookie. */
+ ci = &_ci;
+ WT_RET(__wt_block_ckpt_init(session, block, ci, "string", 0));
+ WT_RET(__wt_block_buffer_to_ckpt(session, block, addr, ci));
+
+ WT_RET(__wt_buf_fmt(session, buf,
+ "version=%d",
+ ci->version));
+ if (ci->root_offset == WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]"));
+ else
+ WT_RET(__wt_buf_catfmt(session, buf,
+ ", root=[%"
+ PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)ci->root_offset,
+ (uintmax_t)(ci->root_offset + ci->root_size),
+ ci->root_size, ci->root_cksum));
+ if (ci->alloc.offset == WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[Empty]"));
+ else
+ WT_RET(__wt_buf_catfmt(session, buf,
+ ", alloc=[%"
+ PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)ci->alloc.offset,
+ (uintmax_t)(ci->alloc.offset + ci->alloc.size),
+ ci->alloc.size, ci->alloc.cksum));
+ if (ci->avail.offset == WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__wt_buf_catfmt(session, buf, ", avail=[Empty]"));
+ else
+ WT_RET(__wt_buf_catfmt(session, buf,
+ ", avail=[%"
+ PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)ci->avail.offset,
+ (uintmax_t)(ci->avail.offset + ci->avail.size),
+ ci->avail.size, ci->avail.cksum));
+ if (ci->discard.offset == WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__wt_buf_catfmt(session, buf, ", discard=[Empty]"));
+ else
+ WT_RET(__wt_buf_catfmt(session, buf,
+ ", discard=[%"
+ PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)ci->discard.offset,
+ (uintmax_t)(ci->discard.offset + ci->discard.size),
+ ci->discard.size, ci->discard.cksum));
+ WT_RET(__wt_buf_catfmt(session, buf,
+ ", file size=%" PRIuMAX
+ ", write generation=%" PRIu64,
+ (uintmax_t)ci->file_size,
+ ci->write_gen));
+
+ return (0);
+}
diff --git a/src/block/block_ext.c b/src/block/block_ext.c
index b102bc54899..7ab2a6fa590 100644
--- a/src/block/block_ext.c
+++ b/src/block/block_ext.c
@@ -471,9 +471,9 @@ __wt_block_off_free(
* locks required to manipulate the extent lists.
*
* We can reuse this extent immediately if it was allocated during this
- * snapshot, merge it into the avail list (which slows file growth in
+ * checkpoint, merge it into the avail list (which slows file growth in
* workloads including repeated overflow record modification). If this
- * extent is referenced in a previous snapshot, merge into the discard
+ * extent is referenced in a previous checkpoint, merge into the discard
* list.
*/
if ((ret = __wt_block_off_remove_overlap(
@@ -515,7 +515,7 @@ __wt_block_extlist_check(
continue;
}
WT_RET_MSG(session, EINVAL,
- "snapshot merge check: %s list overlaps the %s list",
+ "checkpoint merge check: %s list overlaps the %s list",
al->name, bl->name);
}
return (0);
@@ -524,17 +524,17 @@ __wt_block_extlist_check(
/*
* __wt_block_extlist_overlap --
- * Review a snapshot's alloc/discard extent lists, move overlaps into the
- * live system's snapshot-avail list.
+ * Review a checkpoint's alloc/discard extent lists, move overlaps into the
+ * live system's checkpoint-avail list.
*/
int
__wt_block_extlist_overlap(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_SNAPSHOT *si)
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
{
WT_EXT *alloc, *discard;
- alloc = si->alloc.off[0];
- discard = si->discard.off[0];
+ alloc = ci->alloc.off[0];
+ discard = ci->discard.off[0];
/* Walk the lists in parallel, looking for overlaps. */
while (alloc != NULL && discard != NULL) {
@@ -553,7 +553,7 @@ __wt_block_extlist_overlap(
/* Reconcile the overlap. */
WT_RET(__block_ext_overlap(session, block,
- &si->alloc, &alloc, &si->discard, &discard));
+ &ci->alloc, &alloc, &ci->discard, &discard));
}
return (0);
}
@@ -570,7 +570,7 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
WT_EXTLIST *avail, *el;
off_t off, size;
- avail = &block->live.snapshot_avail;
+ avail = &block->live.ckpt_avail;
/*
* The ranges overlap, choose the range we're going to take from each.
@@ -926,7 +926,7 @@ corrupted: WT_ERR_MSG(session, WT_ERROR,
* We could insert instead of merge, because ranges shouldn't
* overlap, but merge knows how to allocate WT_EXT structures,
* and a little paranoia is a good thing (if we corrupted the
- * list and crashed, and rolled back to a corrupted snapshot,
+ * list and crashed, and rolled back to a corrupted checkpoint,
* this might save us?)
*/
WT_ERR(__block_merge(session, el, off, size));
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index b1bc28766f6..e54e01190d4 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -132,41 +132,41 @@ __wt_bm_close(WT_SESSION_IMPL *session)
}
/*
- * __wt_bm_snapshot --
- * Write a buffer into a block, creating a snapshot.
+ * __wt_bm_checkpoint --
+ * Write a buffer into a block, creating a checkpoint.
*/
int
-__wt_bm_snapshot(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_SNAPSHOT *snapbase)
+__wt_bm_checkpoint(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckptbase)
{
WT_BLOCK *block;
if ((block = session->btree->block) == NULL)
return (__bm_invalid(session));
- return (__wt_block_snapshot(session, block, buf, snapbase));
+ return (__wt_block_checkpoint(session, block, buf, ckptbase));
}
/*
- * __wt_bm_snapshot_resolve --
- * Resolve the snapshot.
+ * __wt_bm_checkpoint_resolve --
+ * Resolve the checkpoint.
*/
int
-__wt_bm_snapshot_resolve(WT_SESSION_IMPL *session, WT_SNAPSHOT *snapbase)
+__wt_bm_checkpoint_resolve(WT_SESSION_IMPL *session)
{
WT_BLOCK *block;
if ((block = session->btree->block) == NULL)
return (__bm_invalid(session));
- return (__wt_block_snapshot_resolve(session, block, snapbase));
+ return (__wt_block_checkpoint_resolve(session, block));
}
/*
- * __wt_bm_snapshot_load --
- * Load a snapshot point.
+ * __wt_bm_checkpoint_load --
+ * Load a checkpoint point.
*/
int
-__wt_bm_snapshot_load(WT_SESSION_IMPL *session,
+__wt_bm_checkpoint_load(WT_SESSION_IMPL *session,
WT_ITEM *buf, const uint8_t *addr, uint32_t addr_size, int readonly)
{
WT_BLOCK *block;
@@ -174,23 +174,23 @@ __wt_bm_snapshot_load(WT_SESSION_IMPL *session,
if ((block = session->btree->block) == NULL)
return (__bm_invalid(session));
- return (__wt_block_snapshot_load(
+ return (__wt_block_checkpoint_load(
session, block, buf, addr, addr_size, readonly));
}
/*
- * __wt_bm_snapshot_unload --
- * Unload a snapshot point.
+ * __wt_bm_checkpoint_unload --
+ * Unload a checkpoint point.
*/
int
-__wt_bm_snapshot_unload(WT_SESSION_IMPL *session)
+__wt_bm_checkpoint_unload(WT_SESSION_IMPL *session)
{
WT_BLOCK *block;
if ((block = session->btree->block) == NULL)
return (__bm_invalid(session));
- return (__wt_block_snapshot_unload(session, block));
+ return (__wt_block_checkpoint_unload(session, block));
}
/*
@@ -333,14 +333,14 @@ __wt_bm_salvage_end(WT_SESSION_IMPL *session)
* Start a block manager salvage.
*/
int
-__wt_bm_verify_start(WT_SESSION_IMPL *session, WT_SNAPSHOT *snapbase)
+__wt_bm_verify_start(WT_SESSION_IMPL *session, WT_CKPT *ckptbase)
{
WT_BLOCK *block;
if ((block = session->btree->block) == NULL)
return (__bm_invalid(session));
- return (__wt_block_verify_start(session, block, snapbase));
+ return (__wt_block_verify_start(session, block, ckptbase));
}
/*
diff --git a/src/block/block_open.c b/src/block/block_open.c
index f00e333b735..af4015ed3bb 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -112,7 +112,7 @@ __wt_block_open(WT_SESSION_IMPL *session, const char *filename,
/* Open the underlying file handle. */
WT_ERR(__wt_open(session, filename, 0, 0, 1, &block->fh));
- /* Initialize the live snapshot lock. */
+ /* Initialize the live checkpoint's lock. */
__wt_spin_init(session, &block->live_lock);
/*
@@ -142,7 +142,7 @@ __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block)
WT_VERBOSE_RETVAL(session, block, ret, "close");
- ret = __wt_block_snapshot_unload(session, block);
+ ret = __wt_block_checkpoint_unload(session, block);
if (block->name != NULL)
__wt_free(session, block->name);
diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c
index 0a44731d5aa..0d87232d15e 100644
--- a/src/block/block_slvg.c
+++ b/src/block/block_slvg.c
@@ -21,10 +21,10 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
WT_RET(__wt_desc_init(session, block->fh));
/*
- * Salvage creates a new snapshot when it's finished, set up for
+ * Salvage creates a new checkpoint when it's finished, set up for
* rolling an empty file forward.
*/
- WT_RET(__wt_block_snap_init(session, block, &block->live, "live", 1));
+ WT_RET(__wt_block_ckpt_init(session, block, &block->live, "live", 1));
/*
* Truncate the file to an initial sector plus N allocation size
@@ -48,9 +48,9 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
block->slvg_off = WT_BLOCK_DESC_SECTOR;
/*
- * The only snapshot extent we care about is the allocation list. Start
- * with the entire file on the allocation list, we'll "free" any blocks
- * we don't want as we process the file.
+ * The only checkpoint extent we care about is the allocation list.
+ * Start with the entire file on the allocation list, we'll "free"
+ * any blocks we don't want as we process the file.
*/
WT_RET(__wt_block_insert_ext(session, &block->live.alloc,
WT_BLOCK_DESC_SECTOR, len - WT_BLOCK_DESC_SECTOR));
@@ -70,8 +70,8 @@ __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
block->slvg = 0;
- /* Discard the snapshot. */
- return (__wt_block_snapshot_unload(session, block));
+ /* Discard the checkpoint. */
+ return (__wt_block_checkpoint_unload(session, block));
}
/*
diff --git a/src/block/block_snap.c b/src/block/block_snap.c
deleted file mode 100644
index 6f2b1fd9224..00000000000
--- a/src/block/block_snap.c
+++ /dev/null
@@ -1,706 +0,0 @@
-/*-
- * Copyright (c) 2008-2012 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-static int __snapshot_process(WT_SESSION_IMPL *, WT_BLOCK *, WT_SNAPSHOT *);
-static int __snapshot_string(
- WT_SESSION_IMPL *, WT_BLOCK *, const uint8_t *, WT_ITEM *);
-static int __snapshot_update(WT_SESSION_IMPL *,
- WT_BLOCK *, WT_SNAPSHOT *, WT_BLOCK_SNAPSHOT *, uint64_t, int);
-
-/*
- * __wt_block_snap_init --
- * Initialize a snapshot structure.
- */
-int
-__wt_block_snap_init(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_BLOCK_SNAPSHOT *si, const char *name, int is_live)
-{
- WT_DECL_RET;
-
- /*
- * If we're loading a new live snapshot, there shouldn't be one already
- * loaded. The btree engine should prevent this from ever happening,
- * but paranoia is a healthy thing.
- */
- if (is_live) {
- __wt_spin_lock(session, &block->live_lock);
- if (block->live_load)
- ret = EINVAL;
- else
- block->live_load = 1;
- __wt_spin_unlock(session, &block->live_lock);
- if (ret)
- WT_RET_MSG(session, EINVAL, "snapshot already loaded");
- }
-
- memset(si, 0, sizeof(*si));
-
- si->root_offset = WT_BLOCK_INVALID_OFFSET;
-
- WT_RET(__wt_block_extlist_init(session, &si->alloc, name, "alloc"));
- WT_RET(__wt_block_extlist_init(session, &si->avail, name, "avail"));
- WT_RET(__wt_block_extlist_init(session, &si->discard, name, "discard"));
-
- si->file_size = WT_BLOCK_DESC_SECTOR;
- WT_RET(__wt_block_extlist_init(
- session, &si->snapshot_avail, name, "snapshot_avail"));
-
- return (0);
-}
-
-/*
- * __wt_block_snapshot_load --
- * Load a snapshot.
- */
-int
-__wt_block_snapshot_load(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_ITEM *dsk, const uint8_t *addr, uint32_t addr_size,
- int readonly)
-{
- WT_BLOCK_SNAPSHOT *si;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
-
- WT_UNUSED(addr_size);
-
- /*
- * Sometimes we don't find a root page (we weren't given a snapshot,
- * or the referenced snapshot was empty). In that case we return a
- * root page size of 0. Set that up now.
- */
- dsk->size = 0;
-
- si = &block->live;
- WT_RET(__wt_block_snap_init(session, block, si, "live", 1));
-
- if (WT_VERBOSE_ISSET(session, snapshot)) {
- if (addr != NULL) {
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__snapshot_string(session, block, addr, tmp));
- }
- WT_VERBOSE_ERR(session, snapshot,
- "%s: load-snapshot: %s", block->name,
- addr == NULL ? "[Empty]" : (char *)tmp->data);
- }
-
- /* If not loading a snapshot from disk, we're done. */
- if (addr == NULL || addr_size == 0)
- return (0);
-
- /* Crack the snapshot cookie. */
- if (addr != NULL)
- WT_ERR(__wt_block_buffer_to_snapshot(session, block, addr, si));
-
- /* Verify sets up next. */
- if (block->verify)
- WT_ERR(__wt_verify_snap_load(session, block, si));
-
- /* Read, and optionally verify, any root page. */
- if (si->root_offset != WT_BLOCK_INVALID_OFFSET) {
- WT_ERR(__wt_block_read_off(session, block,
- dsk, si->root_offset, si->root_size, si->root_cksum));
- if (block->verify) {
- if (tmp == NULL) {
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__snapshot_string(
- session, block, addr, tmp));
- }
- WT_ERR(
- __wt_verify_dsk(session, (char *)tmp->data, dsk));
- }
- }
-
- /*
- * Rolling a snapshot forward requires the avail list, the blocks from
- * which we can allocate.
- */
- if (!readonly)
- WT_ERR(__wt_block_extlist_read(session, block, &si->avail));
-
- /*
- * If the snapshot can be written, that means anything written after
- * the snapshot is no longer interesting. Truncate the file.
- */
- if (!readonly) {
- WT_VERBOSE_ERR(session, snapshot,
- "truncate file to %" PRIuMAX, (uintmax_t)si->file_size);
- WT_ERR(__wt_ftruncate(session, block->fh, si->file_size));
- }
-
- if (0) {
-err: (void)__wt_block_snapshot_unload(session, block);
- }
-
- __wt_scr_free(&tmp);
- return (ret);
-}
-
-/*
- * __wt_block_snapshot_unload --
- * Unload a snapshot.
- */
-int
-__wt_block_snapshot_unload(WT_SESSION_IMPL *session, WT_BLOCK *block)
-{
- WT_BLOCK_SNAPSHOT *si;
- WT_DECL_RET;
-
- WT_VERBOSE_RETVAL(
- session, snapshot, ret, "%s: unload snapshot", block->name);
-
- si = &block->live;
-
- /* Verify cleanup. */
- if (block->verify)
- WT_TRET(__wt_verify_snap_unload(session, block, si));
-
- __wt_block_snap_destroy(session, si);
-
- block->live_load = 0;
-
- return (ret);
-}
-
-/*
- * __wt_block_snap_destroy --
- * Clear a snapshot structure.
- */
-void
-__wt_block_snap_destroy(WT_SESSION_IMPL *session, WT_BLOCK_SNAPSHOT *si)
-{
- /* Discard the extent lists. */
- __wt_block_extlist_free(session, &si->alloc);
- __wt_block_extlist_free(session, &si->avail);
- __wt_block_extlist_free(session, &si->discard);
- __wt_block_extlist_free(session, &si->snapshot_avail);
-}
-
-/*
- * __wt_block_snapshot --
- * Create a new snapshot.
- */
-int
-__wt_block_snapshot(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_ITEM *buf, WT_SNAPSHOT *snapbase)
-{
- WT_BLOCK_SNAPSHOT *si;
-
- si = &block->live;
- si->version = WT_BM_SNAPSHOT_VERSION;
-
- /*
- * Write the root page: it's possible for there to be a snapshot of
- * an empty tree, in which case, we store an illegal root offset.
- *
- * XXX
- * We happen to know that snapshots are single-threaded above us in
- * the btree engine. That's probably something we want to guarantee
- * for any WiredTiger block manager.
- */
- if (buf == NULL) {
- si->root_offset = WT_BLOCK_INVALID_OFFSET;
- si->root_size = si->root_cksum = 0;
- } else
- WT_RET(__wt_block_write_off(session, block, buf,
- &si->root_offset, &si->root_size, &si->root_cksum, 0));
-
- /* Process the list of snapshots, deleting and updating as required. */
- WT_RET(__snapshot_process(session, block, snapbase));
-
- /*
- * Snapshots have to hit disk (it would be reasonable to configure for
- * lazy snapshots, but we don't support them yet). Regardless, we're
- * not holding any locks, other writers can proceed while we wait.
- */
- if (!F_ISSET(S2C(session), WT_CONN_NOSYNC))
- WT_RET(__wt_fsync(session, block->fh));
-
- return (0);
-}
-
-/*
- * __snapshot_extlist_fblocks --
- * If an extent list was read from disk, free its space to the live avail
- * list.
- */
-static inline int
-__snapshot_extlist_fblocks(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
-{
- if (el->offset == WT_BLOCK_INVALID_OFFSET)
- return (0);
- return (__wt_block_insert_ext(
- session, &block->live.avail, el->offset, el->size));
-}
-
-/*
- * __snapshot_process --
- * Process the list of snapshots.
- */
-static int
-__snapshot_process(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_SNAPSHOT *snapbase)
-{
- WT_BLOCK_SNAPSHOT *a, *b, *si;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_SNAPSHOT *snap;
- uint64_t snapshot_size;
- int deleting, locked;
-
- si = &block->live;
- locked = 0;
-
- /*
- * We've allocated our last page, update the snapshot size. We need to
- * calculate the live system's snapshot size before reading and merging
- * snapshot allocation and discard information from the snapshots we're
- * deleting, those operations will change the underlying byte counts.
- */
- snapshot_size = si->snapshot_size;
- snapshot_size += si->alloc.bytes;
- snapshot_size -= si->discard.bytes;
-
- /*
- * Extents that become newly available as a result of deleting previous
- * snapshots are added to a list of extents. The list should be empty,
- * but there's no explicit "free the snapshot information" call into the
- * block manager; if there was an error in an upper level resulting in
- * the snapshot never being "resolved", the list might not be empty.
- *
- * XXX
- * This isn't sufficient, actually: we're going to leak all the blocks
- * that were written as part of the last snapshot because it was never
- * resolved.
- */
- __wt_block_extlist_free(session, &si->snapshot_avail);
- WT_RET(__wt_block_extlist_init(
- session, &si->snapshot_avail, "live", "snapshot_avail"));
-
- /*
- * To delete a snapshot, we'll need snapshot information for it, and we
- * have to read that from the disk.
- */
- deleting = 0;
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
- /*
- * To delete a snapshot, we'll need snapshot information for it
- * and the subsequent snapshot. The test is tricky, we have to
- * load the current snapshot's information if it's marked for
- * deletion, or if it follows a snapshot marked for deletion,
- * where the boundary cases are the first snapshot in the list
- * and the last snapshot in the list: if we're deleting the last
- * snapshot in the list, there's no next snapshot, the snapshot
- * will be merged into the live tree.
- */
- if (!F_ISSET(snap, WT_SNAP_DELETE) &&
- (snap == snapbase ||
- F_ISSET(snap, WT_SNAP_ADD) ||
- !F_ISSET(snap - 1, WT_SNAP_DELETE)))
- continue;
- deleting = 1;
-
- /*
- * Allocate a snapshot structure, crack the cookie and read the
- * snapshot's extent lists.
- *
- * Ignore the avail list: snapshot avail lists are only useful
- * if we are rolling forward from the particular snapshot and
- * they represent our best understanding of what blocks can be
- * allocated. If we are not operating on the live snapshot,
- * subsequent snapshots might have allocated those blocks, and
- * the avail list is useless. We don't discard it, because it
- * is useful as part of verification, but we don't re-write it
- * either.
- */
- WT_ERR(__wt_calloc(
- session, 1, sizeof(WT_BLOCK_SNAPSHOT), &snap->bpriv));
- si = snap->bpriv;
- WT_ERR(__wt_block_snap_init(session, block, si, snap->name, 0));
- WT_ERR(__wt_block_buffer_to_snapshot(
- session, block, snap->raw.data, si));
- WT_ERR(__wt_block_extlist_read(session, block, &si->alloc));
- WT_ERR(__wt_block_extlist_read(session, block, &si->discard));
- }
-
- /*
- * Hold a lock so the live extent lists and the file size can't change
- * underneath us. I suspect we'll tighten this if snapshots take too
- * much time away from real work: we read historic snapshot information
- * without a lock, but we could also merge and re-write the delete
- * snapshot information without a lock, except for ranges merged into
- * the live tree.
- */
- __wt_spin_lock(session, &block->live_lock);
- locked = 1;
-
- /* Skip the additional processing if we aren't deleting snapshots. */
- if (!deleting)
- goto live_update;
-
- /*
- * Delete any no-longer-needed snapshots: we do this first as it frees
- * blocks to the live lists, and the freed blocks will then be included
- * when writing the live extent lists.
- */
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
- if (!F_ISSET(snap, WT_SNAP_DELETE))
- continue;
-
- if (WT_VERBOSE_ISSET(session, snapshot)) {
- if (tmp == NULL)
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__snapshot_string(
- session, block, snap->raw.data, tmp));
- WT_VERBOSE_ERR(session, snapshot,
- "%s: delete-snapshot: %s: %s",
- block->name, snap->name, (char *)tmp->data);
- }
-
- /*
- * Set the from/to snapshot structures, where the "to" value
- * may be the live tree.
- */
- a = snap->bpriv;
- if (F_ISSET(snap + 1, WT_SNAP_ADD))
- b = &block->live;
- else
- b = (snap + 1)->bpriv;
-
- /*
- * Free the root page: there's nothing special about this free,
- * the root page is allocated using normal rules, that is, it
- * may have been taken from the avail list, and was entered on
- * the live system's alloc list at that time. We free it into
- * the snapshot's discard list, however, not the live system's
- * list because it appears on the snapshot's alloc list and so
- * must be paired in the snapshot.
- */
- if (a->root_offset != WT_BLOCK_INVALID_OFFSET)
- WT_ERR(__wt_block_insert_ext(session,
- &a->discard, a->root_offset, a->root_size));
-
- /*
- * Free the blocks used to hold the "from" snapshot's extent
- * lists directly to the live system's avail list, they were
- * never on any alloc list. Include the "from" snapshot's
- * avail list, it's going away.
- */
- WT_ERR(__snapshot_extlist_fblocks(session, block, &a->alloc));
- WT_ERR(__snapshot_extlist_fblocks(session, block, &a->avail));
- WT_ERR(__snapshot_extlist_fblocks(session, block, &a->discard));
-
- /*
- * Roll the "from" alloc and discard extent lists into the "to"
- * snapshot's lists.
- */
- if (a->alloc.entries != 0)
- WT_ERR(__wt_block_extlist_merge(
- session, &a->alloc, &b->alloc));
- if (a->discard.entries != 0)
- WT_ERR(__wt_block_extlist_merge(
- session, &a->discard, &b->discard));
-
- /*
- * If the "to" snapshot is also being deleted, we're done with
- * it, it's merged into some other snapshot in the next loop.
- * This means the extent lists may aggregate over a number of
- * snapshots, but that's OK, they're disjoint sets of ranges.
- */
- if (F_ISSET(snap + 1, WT_SNAP_DELETE))
- continue;
-
- /*
- * Find blocks for re-use: wherever the "to" snapshot's allocate
- * and discard lists overlap is fair game, move ranges appearing
- * on both lists to the live snapshot's newly available list.
- */
- WT_ERR(__wt_block_extlist_overlap(session, block, b));
-
- /*
- * If we're updating the live system's information, we're done.
- */
- if (F_ISSET(snap + 1, WT_SNAP_ADD))
- continue;
-
- /*
- * We have to write the "to" snapshot's extent lists out in new
- * blocks, and update its cookie.
- *
- * Free the blocks used to hold the "to" snapshot's extent lists
- * directly to the live system's avail list, they were never on
- * any alloc list. Do not include the "to" snapshot's avail
- * list, it's not changing.
- */
- WT_ERR(__snapshot_extlist_fblocks(session, block, &b->alloc));
- WT_ERR(__snapshot_extlist_fblocks(session, block, &b->discard));
-
- F_SET(snap + 1, WT_SNAP_UPDATE);
- }
-
- /* Update snapshots marked for update. */
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- if (F_ISSET(snap, WT_SNAP_UPDATE)) {
- WT_ASSERT(session, !F_ISSET(snap, WT_SNAP_ADD));
- WT_ERR(__snapshot_update(
- session, block, snap, snap->bpriv, 0, 0));
- }
-
-live_update:
- si = &block->live;
-
- /* Truncate the file if that's possible. */
- WT_ERR(__wt_block_extlist_truncate(session, block, &si->avail));
-
- /* Update the final, added snapshot based on the live system. */
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- if (F_ISSET(snap, WT_SNAP_ADD)) {
- WT_ERR(__snapshot_update(
- session, block, snap, si, snapshot_size, 1));
-
- /*
- * XXX
- * Our caller wants two pieces of information: the time
- * the snapshot was taken and the final snapshot size.
- * This violates layering but the alternative is a call
- * for the btree layer to crack the snapshot cookie into
- * its components, and that's a fair amount of work.
- * (We could just read the system time in the session
- * layer when updating the metadata file, but that won't
- * work for the snapshot size, and so we do both here.)
- */
- snap->snapshot_size = si->snapshot_size;
- WT_ERR(__wt_epoch(session, &snap->sec, NULL));
- }
-
- /*
- * Reset the live system's alloc and discard extent lists, leave the
- * avail list alone.
- */
- __wt_block_extlist_free(session, &si->alloc);
- WT_ERR(__wt_block_extlist_init(session, &si->alloc, "live", "alloc"));
- __wt_block_extlist_free(session, &si->discard);
- WT_ERR(
- __wt_block_extlist_init(session, &si->discard, "live", "discard"));
-
-#ifdef HAVE_DIAGNOSTIC
- /*
- * The first snapshot in the system should always have an empty discard
- * list. If we've read that snapshot and/or created it, check.
- */
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- if (!F_ISSET(snap, WT_SNAP_DELETE))
- break;
- if ((a = snap->bpriv) == NULL)
- a = &block->live;
- if (a->discard.entries != 0) {
- __wt_errx(session,
- "snapshot incorrectly has blocks on the discard list");
- WT_ERR(WT_ERROR);
- }
-#endif
-
-err: if (locked)
- __wt_spin_unlock(session, &block->live_lock);
-
- /* Discard any snapshot information we loaded, we no longer need it. */
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- if ((si = snap->bpriv) != NULL) {
- __wt_block_extlist_free(session, &si->alloc);
- __wt_block_extlist_free(session, &si->avail);
- __wt_block_extlist_free(session, &si->discard);
- }
-
- __wt_scr_free(&tmp);
- return (ret);
-}
-
-/*
- * __snapshot_update --
- * Update a snapshot.
- */
-static int
-__snapshot_update(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_SNAPSHOT *snap,
- WT_BLOCK_SNAPSHOT *si, uint64_t snapshot_size, int is_live)
-{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- uint8_t *endp;
-
-#ifdef HAVE_DIAGNOSTIC
- /* Check the extent list combinations for overlaps. */
- WT_RET(__wt_block_extlist_check(session, &si->alloc, &si->avail));
- WT_RET(__wt_block_extlist_check(session, &si->discard, &si->avail));
- WT_RET(__wt_block_extlist_check(session, &si->alloc, &si->discard));
-#endif
- /*
- * Write the snapshot's extent lists; we only write an avail list for
- * the live system, other snapshot's avail lists are static and never
- * change. When we do write the avail list for the live system it's
- * two lists: the current avail list plus the list of blocks that are
- * being made available as of the new snapshot. We can't merge that
- * second list into the real list yet, it's not truly available until
- * the new snapshot location has been saved to the metadata.
- */
- WT_RET(__wt_block_extlist_write(session, block, &si->alloc, NULL));
- if (is_live)
- WT_RET(__wt_block_extlist_write(
- session, block, &si->avail, &si->snapshot_avail));
- WT_RET(__wt_block_extlist_write(session, block, &si->discard, NULL));
-
- /*
- * Set the file size for the live system.
- *
- * XXX
- * We do NOT set the file size when re-writing snapshots because we want
- * to test the snapshot's blocks against a reasonable maximum file size
- * during verification. This is not good: imagine a snapshot appearing
- * early in the file, re-written, and then the snapshot requires blocks
- * at the end of the file, blocks after the listed file size. If the
- * application opens that snapshot for writing (discarding subsequent
- * snapshots), we would truncate the file to the early chunk, discarding
- * the re-written snapshot information. The alternative, updating the
- * file size has its own problems, in that case we'd work correctly, but
- * we'd lose all of the blocks between the original snapshot and the
- * re-written snapshot. Currently, there's no API to roll-forward
- * intermediate snapshots, if there ever is, this will need to be fixed.
- */
- if (is_live)
- WT_RET(__wt_filesize(session, block->fh, &si->file_size));
-
- /* Set the snapshot size for the live system. */
- if (is_live)
- si->snapshot_size = snapshot_size;
-
- /*
- * Copy the snapshot information into the snapshot array's address
- * cookie.
- */
- WT_RET(__wt_buf_init(session, &snap->raw, WT_BTREE_MAX_ADDR_COOKIE));
- endp = snap->raw.mem;
- WT_RET(__wt_block_snapshot_to_buffer(session, block, &endp, si));
- snap->raw.size = WT_PTRDIFF32(endp, snap->raw.mem);
-
- if (WT_VERBOSE_ISSET(session, snapshot)) {
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__snapshot_string(session, block, snap->raw.data, tmp));
- WT_VERBOSE_ERR(session, snapshot,
- "%s: create-snapshot: %s: %s",
- block->name, snap->name, (char *)tmp->data);
- }
-
-err: __wt_scr_free(&tmp);
- return (ret);
-}
-
-/*
- * __wt_block_snapshot_resolve --
- * Resolve a snapshot.
- */
-int
-__wt_block_snapshot_resolve(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_SNAPSHOT *snapbase)
-{
- WT_BLOCK_SNAPSHOT *si;
- WT_DECL_RET;
-
- si = &block->live;
-
- /*
- * Snapshots are a two-step process: first, we write a new snapshot to
- * disk (including all the new extent lists for modified snapshots and
- * the live system). As part of this we create a list of file blocks
- * newly available for re-allocation, based on snapshots being deleted.
- * We then return the locations of the new snapshot information to our
- * caller. Our caller has to write that information into some kind of
- * stable storage, and once that's done, we can actually allocate from
- * that list of newly available file blocks. (We can't allocate from
- * that list immediately because the allocation might happen before our
- * caller saves the new snapshot information, and if we crashed before
- * the new snapshot information was saved, we'd have overwritten blocks
- * still referenced by snapshots in the system.) In summary, there is
- * a second step, after our caller saves the snapshot information, we
- * are called to add the newly available blocks into the live system's
- * available list.
- */
- __wt_spin_lock(session, &block->live_lock);
- ret =
- __wt_block_extlist_merge(session, &si->snapshot_avail, &si->avail);
- __wt_spin_unlock(session, &block->live_lock);
-
- /* Discard the list. */
- __wt_block_extlist_free(session, &si->snapshot_avail);
-
- WT_UNUSED(snapbase);
- return (ret);
-}
-
-/*
- * __snapshot_string --
- * Return a printable string representation of a snapshot address cookie.
- */
-static int
-__snapshot_string(WT_SESSION_IMPL *session,
- WT_BLOCK *block, const uint8_t *addr, WT_ITEM *buf)
-{
- WT_BLOCK_SNAPSHOT *si, _si;
-
- /* Initialize the snapshot, crack the cookie. */
- si = &_si;
- WT_RET(__wt_block_snap_init(session, block, si, "string", 0));
- WT_RET(__wt_block_buffer_to_snapshot(session, block, addr, si));
-
- WT_RET(__wt_buf_fmt(session, buf,
- "version=%d",
- si->version));
- if (si->root_offset == WT_BLOCK_INVALID_OFFSET)
- WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]"));
- else
- WT_RET(__wt_buf_catfmt(session, buf,
- ", root=[%"
- PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)si->root_offset,
- (uintmax_t)(si->root_offset + si->root_size),
- si->root_size, si->root_cksum));
- if (si->alloc.offset == WT_BLOCK_INVALID_OFFSET)
- WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[Empty]"));
- else
- WT_RET(__wt_buf_catfmt(session, buf,
- ", alloc=[%"
- PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)si->alloc.offset,
- (uintmax_t)(si->alloc.offset + si->alloc.size),
- si->alloc.size, si->alloc.cksum));
- if (si->avail.offset == WT_BLOCK_INVALID_OFFSET)
- WT_RET(__wt_buf_catfmt(session, buf, ", avail=[Empty]"));
- else
- WT_RET(__wt_buf_catfmt(session, buf,
- ", avail=[%"
- PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)si->avail.offset,
- (uintmax_t)(si->avail.offset + si->avail.size),
- si->avail.size, si->avail.cksum));
- if (si->discard.offset == WT_BLOCK_INVALID_OFFSET)
- WT_RET(__wt_buf_catfmt(session, buf, ", discard=[Empty]"));
- else
- WT_RET(__wt_buf_catfmt(session, buf,
- ", discard=[%"
- PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)si->discard.offset,
- (uintmax_t)(si->discard.offset + si->discard.size),
- si->discard.size, si->discard.cksum));
- WT_RET(__wt_buf_catfmt(session, buf,
- ", file size=%" PRIuMAX
- ", write generation=%" PRIu64,
- (uintmax_t)si->file_size,
- si->write_gen));
-
- return (0);
-}
diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c
index 711e982b66d..5373e5d2b30 100644
--- a/src/block/block_vrfy.c
+++ b/src/block/block_vrfy.c
@@ -7,14 +7,14 @@
#include "wt_internal.h"
+static int __verify_ckptfrag_add(WT_SESSION_IMPL *, WT_BLOCK *, off_t, off_t);
+static int __verify_ckptfrag_chk(WT_SESSION_IMPL *, WT_BLOCK *);
static int __verify_filefrag_add(
WT_SESSION_IMPL *, WT_BLOCK *, off_t, off_t, int);
static int __verify_filefrag_chk(WT_SESSION_IMPL *, WT_BLOCK *);
-static int __verify_snapfrag_add(WT_SESSION_IMPL *, WT_BLOCK *, off_t, off_t);
-static int __verify_snapfrag_chk(WT_SESSION_IMPL *, WT_BLOCK *);
-static int __verify_start_avail(WT_SESSION_IMPL *, WT_BLOCK *, WT_SNAPSHOT *);
+static int __verify_start_avail(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
static int __verify_start_filesize(
- WT_SESSION_IMPL *, WT_BLOCK *, WT_SNAPSHOT *, off_t *);
+ WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *, off_t *);
/* The bit list ignores the first sector: convert to/from a frag/offset. */
#define WT_OFF_TO_FRAG(block, off) \
@@ -28,7 +28,7 @@ static int __verify_start_filesize(
*/
int
__wt_block_verify_start(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_SNAPSHOT *snapbase)
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
{
off_t file_size;
@@ -40,12 +40,12 @@ __wt_block_verify_start(
return (0);
/*
- * Opening a WiredTiger file truncates it back to the snapshot we are
+ * Opening a WiredTiger file truncates it back to the checkpoint we are
* rolling forward, which means it's OK if there are blocks written
- * after that snapshot, they'll be ignored. Find the largest file size
- * referenced by any snapshot.
+ * after that checkpoint, they'll be ignored. Find the largest file
+ * size referenced by any checkpoint.
*/
- WT_RET(__verify_start_filesize(session, block, snapbase, &file_size));
+ WT_RET(__verify_start_filesize(session, block, ckptbase, &file_size));
/*
* Allocate a bit array, where each bit represents a single allocation
@@ -78,16 +78,16 @@ __wt_block_verify_start(
/*
* We maintain an allocation list that is rolled forward through the
- * set of snapshots.
+ * set of checkpoints.
*/
WT_RET(__wt_block_extlist_init(
session, &block->verify_alloc, "verify", "alloc"));
/*
- * The only snapshot avail list we care about is the last one written;
+ * The only checkpoint avail list we care about is the last one written;
* get it now and initialize the list of file fragments.
*/
- WT_RET(__verify_start_avail(session, block, snapbase));
+ WT_RET(__verify_start_avail(session, block, ckptbase));
block->verify = 1;
return (0);
@@ -95,36 +95,36 @@ __wt_block_verify_start(
/*
* __verify_start_filesize --
- * Set the file size for the last snapshot.
+ * Set the file size for the last checkpoint.
*/
static int
__verify_start_filesize(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_SNAPSHOT *snapbase, off_t *file_sizep)
+ WT_BLOCK *block, WT_CKPT *ckptbase, off_t *file_sizep)
{
- WT_BLOCK_SNAPSHOT *si, _si;
- WT_SNAPSHOT *snap;
+ WT_BLOCK_CKPT *ci, _ci;
+ WT_CKPT *ckpt;
off_t file_size;
- si = &_si;
+ ci = &_ci;
/*
- * Find the largest file size referenced by any snapshot -- that should
- * be the last snapshot taken, but out of sheer, raving paranoia, look
- * through the list, future changes to snapshots might break this code
+ * Find the largest file size referenced by any checkpoint: that should
+ * be the last checkpoint taken, but out of sheer, raving paranoia, look
+ * through the list, future changes to checkpoints might break this code
* if we make that assumption.
*/
file_size = 0;
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
- WT_RET(__wt_block_buffer_to_snapshot(
- session, block, snap->raw.data, si));
- if (si->file_size > file_size)
- file_size = si->file_size;
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ WT_RET(__wt_block_buffer_to_ckpt(
+ session, block, ckpt->raw.data, ci));
+ if (ci->file_size > file_size)
+ file_size = ci->file_size;
}
- /* Verify doesn't make any sense if we don't have a snapshot. */
+ /* Verify doesn't make any sense if we don't have a checkpoint. */
if (file_size <= WT_BLOCK_DESC_SECTOR)
WT_RET_MSG(session, WT_ERROR,
- "%s has no snapshots to verify", block->name);
+ "%s has no checkpoints to verify", block->name);
/*
* The file size should be a multiple of the allocsize, offset by the
@@ -133,7 +133,7 @@ __verify_start_filesize(WT_SESSION_IMPL *session,
file_size -= WT_BLOCK_DESC_SECTOR;
if (file_size % block->allocsize != 0)
WT_RET_MSG(session, WT_ERROR,
- "the snapshot file size is not a multiple of the "
+ "the checkpoint file size is not a multiple of the "
"allocation size");
*file_sizep = file_size;
@@ -142,32 +142,31 @@ __verify_start_filesize(WT_SESSION_IMPL *session,
/*
* __verify_start_avail --
- * Get the last snapshot's avail list and load it into the list of file
+ * Get the last checkpoint's avail list and load it into the list of file
* fragments.
*/
static int
__verify_start_avail(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_SNAPSHOT *snapbase)
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
{
- WT_BLOCK_SNAPSHOT *si, _si;
+ WT_BLOCK_CKPT *ci, _ci;
+ WT_CKPT *ckpt;
WT_DECL_RET;
WT_EXT *ext;
WT_EXTLIST *el;
- WT_SNAPSHOT *snap;
- /* Get the last on-disk snapshot, if one exists. */
- WT_SNAPSHOT_FOREACH(snapbase, snap)
+ /* Get the last on-disk checkpoint, if one exists. */
+ WT_CKPT_FOREACH(ckptbase, ckpt)
;
- if (snap == snapbase)
+ if (ckpt == ckptbase)
return (0);
- --snap;
+ --ckpt;
- si = &_si;
- WT_RET(__wt_block_snap_init(session, block, si, snap->name, 0));
- WT_ERR(
- __wt_block_buffer_to_snapshot(session, block, snap->raw.data, si));
+ ci = &_ci;
+ WT_RET(__wt_block_ckpt_init(session, block, ci, ckpt->name, 0));
+ WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
- el = &si->avail;
+ el = &ci->avail;
if (el->offset != WT_BLOCK_INVALID_OFFSET) {
WT_ERR(__wt_block_extlist_read(session, block, el));
WT_EXT_FOREACH(ext, el->off)
@@ -176,7 +175,7 @@ __verify_start_avail(
break;
}
-err: __wt_block_snap_destroy(session, si);
+err: __wt_block_ckpt_destroy(session, ci);
return (ret);
}
@@ -197,59 +196,59 @@ __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
/* Discard the fragment tracking lists. */
__wt_free(session, block->fragfile);
- __wt_free(session, block->fragsnap);
+ __wt_free(session, block->fragckpt);
block->verify = 0;
return (ret);
}
/*
- * __wt_verify_snap_load --
- * Verify work done when a snapshot is loaded.
+ * __wt_verify_ckpt_load --
+ * Verify work done when a checkpoint is loaded.
*/
int
-__wt_verify_snap_load(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_SNAPSHOT *si)
+__wt_verify_ckpt_load(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
{
WT_EXTLIST *el;
WT_EXT *ext;
uint32_t frag, frags;
- /* Set the maximum file size for this snapshot. */
- block->verify_size = si->file_size;
+ /* Set the maximum file size for this checkpoint. */
+ block->verify_size = ci->file_size;
/*
* Add the root page and disk blocks used to store the extent lists to
* the list of blocks we've "seen" from the file.
*/
- if (si->root_offset != WT_BLOCK_INVALID_OFFSET)
+ if (ci->root_offset != WT_BLOCK_INVALID_OFFSET)
WT_RET(__verify_filefrag_add(session,
- block, si->root_offset, (off_t)si->root_size, 1));
- if (si->alloc.offset != WT_BLOCK_INVALID_OFFSET)
+ block, ci->root_offset, (off_t)ci->root_size, 1));
+ if (ci->alloc.offset != WT_BLOCK_INVALID_OFFSET)
WT_RET(__verify_filefrag_add(session,
- block, si->alloc.offset, (off_t)si->alloc.size, 1));
- if (si->avail.offset != WT_BLOCK_INVALID_OFFSET)
+ block, ci->alloc.offset, (off_t)ci->alloc.size, 1));
+ if (ci->avail.offset != WT_BLOCK_INVALID_OFFSET)
WT_RET(__verify_filefrag_add(session,
- block, si->avail.offset, (off_t)si->avail.size, 1));
- if (si->discard.offset != WT_BLOCK_INVALID_OFFSET)
+ block, ci->avail.offset, (off_t)ci->avail.size, 1));
+ if (ci->discard.offset != WT_BLOCK_INVALID_OFFSET)
WT_RET(__verify_filefrag_add(session,
- block, si->discard.offset, (off_t)si->discard.size, 1));
+ block, ci->discard.offset, (off_t)ci->discard.size, 1));
/*
- * Snapshot verification is similar to deleting snapshots. As we read
- * each new snapshot, we merge the allocation lists (accumulating all
- * allocated pages as we move through the system), and then remove any
- * pages found in the discard list. The result should be a one-to-one
- * mapping to the pages we find in this particular snapshot.
+ * Checkpoint verification is similar to deleting checkpoints. As we
+ * read each new checkpoint, we merge the allocation lists (accumulating
+ * all allocated pages as we move through the system), and then remove
+ * any pages found in the discard list. The result should be a
+ * one-to-one mapping to the pages we find in this specific checkpoint.
*/
- el = &si->alloc;
+ el = &ci->alloc;
if (el->offset != WT_BLOCK_INVALID_OFFSET) {
WT_RET(__wt_block_extlist_read(session, block, el));
WT_RET(__wt_block_extlist_merge(
session, el, &block->verify_alloc));
__wt_block_extlist_free(session, el);
}
- el = &si->discard;
+ el = &ci->discard;
if (el->offset != WT_BLOCK_INVALID_OFFSET) {
WT_RET(__wt_block_extlist_read(session, block, el));
WT_EXT_FOREACH(ext, el->off)
@@ -259,50 +258,51 @@ __wt_verify_snap_load(
}
/*
- * The root page of the snapshot appears on the alloc list, but not, at
- * least until the snapshot is deleted, on a discard list. To handle
- * this case, remove the root page from the accumulated list of snapshot
- * pages, so it doesn't add a new requirement for subsequent snapshots.
+ * The root page of the checkpoint appears on the alloc list, but not,
+ * at least until the checkpoint is deleted, on a discard list. To
+ * handle this case, remove the root page from the accumulated list of
+ * checkpoint pages, so it doesn't add a new requirement for subsequent
+ * checkpoints.
*/
- if (si->root_offset != WT_BLOCK_INVALID_OFFSET)
+ if (ci->root_offset != WT_BLOCK_INVALID_OFFSET)
WT_RET(__wt_block_off_remove_overlap(session,
- &block->verify_alloc, si->root_offset, si->root_size));
+ &block->verify_alloc, ci->root_offset, ci->root_size));
/*
- * Allocate the per-snapshot bit map. The per-snapshot bit map is the
- * opposite of the per-file bit map, that is, we set all the bits that
- * we expect to be set based on the snapshot's allocation and discard
- * lists, then clear bits as we verify blocks. When finished verifying
- * the snapshot, the bit list should be empty.
+ * Allocate the per-checkpoint bit map. The per-checkpoint bit map is
+ * the opposite of the per-file bit map, that is, we set all the bits
+ * that we expect to be set based on the checkpoint's allocation and
+ * discard lists, then clear bits as we verify blocks. When finished
+ * verifying the checkpoint, the bit list should be empty.
*/
- WT_RET(__bit_alloc(session, block->frags, &block->fragsnap));
+ WT_RET(__bit_alloc(session, block->frags, &block->fragckpt));
el = &block->verify_alloc;
WT_EXT_FOREACH(ext, el->off) {
frag = (uint32_t)WT_OFF_TO_FRAG(block, ext->off);
frags = (uint32_t)(ext->size / block->allocsize);
- __bit_nset(block->fragsnap, frag, frag + (frags - 1));
+ __bit_nset(block->fragckpt, frag, frag + (frags - 1));
}
return (0);
}
/*
- * __wt_verify_snap_unload --
- * Verify work done when a snapshot is unloaded.
+ * __wt_verify_ckpt_unload --
+ * Verify work done when a checkpoint is unloaded.
*/
int
-__wt_verify_snap_unload(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_SNAPSHOT *si)
+__wt_verify_ckpt_unload(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
{
WT_DECL_RET;
- WT_UNUSED(si);
+ WT_UNUSED(ci);
- /* Confirm we verified every snapshot block. */
- ret = __verify_snapfrag_chk(session, block);
+ /* Confirm we verified every checkpoint block. */
+ ret = __verify_ckptfrag_chk(session, block);
- /* Discard the per-snapshot fragment list. */
- __wt_free(session, block->fragsnap);
+ /* Discard the per-checkpoint fragment list. */
+ __wt_free(session, block->fragckpt);
return (ret);
}
@@ -354,7 +354,7 @@ err: __wt_scr_free(&tmp);
/*
* __wt_block_verify_addr --
- * Update an address in a snapshot as verified.
+ * Update an address in a checkpoint as verified.
*/
int
__wt_block_verify_addr(WT_SESSION_IMPL *session,
@@ -374,18 +374,19 @@ __wt_block_verify_addr(WT_SESSION_IMPL *session,
/*
* It's tempting to try and flag a page as "verified" when we read it.
* That doesn't work because we may visit a page multiple times when
- * verifying a single snapshot (for example, when verifying the physical
- * image of a row-store leaf page with overflow keys, the overflow keys
- * are read when checking for key sort issues, and read again when more
- * general overflow item checking is done). This function is called by
- * the btree verification code, once per logical visit in a snapshot, so
- * we can detect if a page is referenced multiple times within a single
- * snapshot. This doesn't apply to the per-file list, because it is
- * expected for the same btree blocks to appear in multiple snapshots.
+ * verifying a single checkpoint (for example, when verifying the
+ * physical image of a row-store leaf page with overflow keys, the
+ * overflow keys are read when checking for key sort issues, and read
+ * again when more general overflow item checking is done). This
+ * function is called by the btree verification code, once per logical
+ * visit in a checkpoint, so we can detect if a page is referenced
+ * multiple times within a single checkpoint. This doesn't apply to
+ * the per-file list, because it is expected for the same btree blocks
+ * to appear in multiple checkpoints.
*
- * Add the block to the per-snapshot list.
+ * Add the block to the per-checkpoint list.
*/
- WT_RET(__verify_snapfrag_add(session, block, offset, size));
+ WT_RET(__verify_ckptfrag_add(session, block, offset, size));
return (0);
}
@@ -457,7 +458,7 @@ __verify_filefrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
}
__wt_errx(session,
- "file range %" PRIuMAX "-%" PRIuMAX " was never verified",
+ "file range %" PRIuMAX "-%" PRIuMAX " never verified",
(uintmax_t)WT_FRAG_TO_OFF(block, first),
(uintmax_t)WT_FRAG_TO_OFF(block, last));
ret = WT_ERROR;
@@ -466,28 +467,28 @@ __verify_filefrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
}
/*
- * __verify_snapfrag_add --
- * Clear the fragments in the per-snapshot fragment list, and complain if
- * we've already verified this chunk of the snapshot.
+ * __verify_ckptfrag_add --
+ * Clear the fragments in the per-checkpoint fragment list, and complain if
+ * we've already verified this chunk of the checkpoint.
*/
static int
-__verify_snapfrag_add(
+__verify_ckptfrag_add(
WT_SESSION_IMPL *session, WT_BLOCK *block, off_t offset, off_t size)
{
uint32_t f, frag, frags, i;
WT_VERBOSE_RET(session, verify,
- "adding snapshot block at %" PRIuMAX "-%" PRIuMAX " (%" PRIuMAX ")",
+ "add checkpoint block at %" PRIuMAX "-%" PRIuMAX " (%" PRIuMAX ")",
(uintmax_t)offset, (uintmax_t)(offset + size), (uintmax_t)size);
/*
- * Check each chunk against the snapshot's size, a snapshot should never
- * reference a block outside of the snapshot's stored size.
+ * Check each chunk against the checkpoint's size, a checkpoint should
+ * never reference a block outside of the checkpoint's stored size.
*/
if (offset + size > block->verify_size)
WT_RET_MSG(session, WT_ERROR,
"fragment %" PRIuMAX "-%" PRIuMAX " references "
- "file blocks outside the snapshot",
+ "file blocks outside the checkpoint",
(uintmax_t)offset, (uintmax_t)(offset + size));
frag = (uint32_t)WT_OFF_TO_FRAG(block, offset);
@@ -495,49 +496,48 @@ __verify_snapfrag_add(
/* It is illegal to reference a particular chunk more than once. */
for (f = frag, i = 0; i < frags; ++f, ++i)
- if (!__bit_test(block->fragsnap, f))
+ if (!__bit_test(block->fragckpt, f))
WT_RET_MSG(session, WT_ERROR,
- "snapshot fragment at %" PRIuMAX " referenced "
- "multiple times in a single snapshot or found in "
- "the snapshot but not listed in the snapshot's "
+ "checkpoint fragment at %" PRIuMAX " referenced "
+ "multiple times in a single checkpoint or found in "
+ "the checkpoint but not listed in the checkpoint's "
"allocation list",
(uintmax_t)offset);
- /* Remove fragments from the snapshot's allocation list. */
- __bit_nclr(block->fragsnap, frag, frag + (frags - 1));
+ /* Remove fragments from the checkpoint's allocation list. */
+ __bit_nclr(block->fragckpt, frag, frag + (frags - 1));
return (0);
}
/*
- * __verify_snapfrag_chk --
- * Verify we've checked all the fragments in the snapshot.
+ * __verify_ckptfrag_chk --
+ * Verify we've checked all the fragments in the checkpoint.
*/
static int
-__verify_snapfrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
+__verify_ckptfrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
WT_DECL_RET;
uint32_t first, last;
/*
- * Check for snapshot fragments we haven't verified -- every time we
+ * Check for checkpoint fragments we haven't verified -- every time we
* find a bit that's set, complain. We re-start the search each time
* after clearing the set bit(s) we found: it's simpler and this isn't
* supposed to happen a lot.
*/
for (;;) {
- if (__bit_ffs(block->fragsnap, block->frags, &first) != 0)
+ if (__bit_ffs(block->fragckpt, block->frags, &first) != 0)
break;
- __bit_clear(block->fragsnap, first);
+ __bit_clear(block->fragckpt, first);
for (last = first + 1; last < block->frags; ++last) {
- if (!__bit_test(block->fragsnap, last))
+ if (!__bit_test(block->fragckpt, last))
break;
- __bit_clear(block->fragsnap, last);
+ __bit_clear(block->fragckpt, last);
}
__wt_errx(session,
- "snapshot range %" PRIuMAX "-%" PRIuMAX " was never "
- "verified",
+ "checkpoint range %" PRIuMAX "-%" PRIuMAX " never verified",
(uintmax_t)WT_FRAG_TO_OFF(block, first),
(uintmax_t)WT_FRAG_TO_OFF(block, last));
ret = WT_ERROR;
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 504a1ab18d9..0d99f02245b 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -225,7 +225,7 @@ not_compressed: /*
/*
* Allocate space from the underlying file and write the block. Always
- * extend the file when writing snapshot extents, that's easier than
+ * extend the file when writing checkpoint extents, that's easier than
* distinguishing between extents allocated from the live avail list,
* and those which can't be allocated from the live avail list such as
* blocks for writing the live avail list itself.
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index a040d9f8065..9c238cd1da6 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -74,19 +74,20 @@ __wt_btree_open(WT_SESSION_IMPL *session,
session, filename, btree->config, cfg, forced_salvage));
/*
- * Open the specified snapshot unless it's a special command (special
- * commands are responsible for loading their own snapshots, if any).
+ * Open the specified checkpoint unless it's a special command (special
+ * commands are responsible for loading their own checkpoints, if any).
*/
if (F_ISSET(btree,
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
return (0);
/*
- * There are two reasons to load an empty tree rather than a snapshot:
- * either there is no snapshot (the file is being created), or the load
- * call returns no root page (the snapshot is empty).
+ * There are two reasons to load an empty tree rather than a checkpoint:
+ * either there is no checkpoint (the file is being created), or the
+ * load call returns no root page (the checkpoint is for an empty file).
*/
- WT_ERR(__wt_bm_snapshot_load(session, &dsk, addr, addr_size, readonly));
+ WT_ERR(
+ __wt_bm_checkpoint_load(session, &dsk, addr, addr_size, readonly));
if (addr == NULL || addr_size == 0 || dsk.size == 0)
WT_ERR(__btree_tree_open_empty(session));
else {
@@ -117,14 +118,11 @@ __wt_btree_close(WT_SESSION_IMPL *session)
btree = session->btree;
- /*
- * Discard the tree and, if the tree is modified, create a new snapshot
- * for the underlying object, unless it's a special command.
- */
+ /* Unload the checkpoint, unless it's a special command. */
if (F_ISSET(btree, WT_BTREE_OPEN) &&
!F_ISSET(btree,
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
- WT_TRET(__wt_bm_snapshot_unload(session));
+ WT_TRET(__wt_bm_checkpoint_unload(session));
/* Close the underlying block manager reference. */
WT_TRET(__wt_bm_close(session));
@@ -132,9 +130,9 @@ __wt_btree_close(WT_SESSION_IMPL *session)
/* Close the Huffman tree. */
__wt_btree_huffman_close(session);
- /* Snapshot lock. */
- if (btree->snaplock != NULL)
- __wt_rwlock_destroy(session, &btree->snaplock);
+ /* Checkpoint lock. */
+ if (btree->ckptlock != NULL)
+ __wt_rwlock_destroy(session, &btree->ckptlock);
/* Free allocated memory. */
__wt_free(session, btree->key_format);
@@ -212,8 +210,9 @@ __btree_conf(WT_SESSION_IMPL *session)
}
}
- /* Snapshot lock. */
- WT_RET(__wt_rwlock_alloc(session, "btree snapshot", &btree->snaplock));
+ /* Checkpoint lock. */
+ WT_RET(
+ __wt_rwlock_alloc(session, "btree checkpoint", &btree->ckptlock));
/* Page sizes */
WT_RET(__btree_page_sizes(session, config));
@@ -327,8 +326,8 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session)
/*
* Mark the child page empty so that if it is evicted, the tree ends up
- * sane. The page should not be dirty, or we will always write empty
- * trees on close, including empty snapshots.
+ * sane. The page should not be dirty, else we would write empty trees
+ * on close, including empty checkpoints.
*/
WT_ERR(__wt_page_modify_init(session, leaf));
F_SET(leaf->modify, WT_PM_REC_EMPTY);
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index f7559c9d200..7bec54b86e3 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -137,7 +137,7 @@ static int __slvg_trk_ovfl(WT_SESSION_IMPL *,
*/
int
__wt_bt_salvage(
- WT_SESSION_IMPL *session, WT_SNAPSHOT *snapbase, const char *cfg[])
+ WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
{
WT_BTREE *btree;
WT_DECL_RET;
@@ -270,12 +270,12 @@ __wt_bt_salvage(
/*
* Step 9:
- * Evict the newly created root page, creating a snapshot.
+ * Evict the newly created root page, creating a checkpoint.
*/
if (ss->root_page != NULL) {
- btree->snap = snapbase;
+ btree->ckpt = ckptbase;
ret = __wt_rec_evict(session, ss->root_page, WT_REC_SINGLE);
- btree->snap = NULL;
+ btree->ckpt = NULL;
ss->root_page = NULL;
}
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 7d37d30c155..ab391458451 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -13,7 +13,7 @@
*/
int
__wt_bt_cache_flush(
- WT_SESSION_IMPL *session, WT_SNAPSHOT *snapbase, int op, int force)
+ WT_SESSION_IMPL *session, WT_CKPT *ckptbase, int op, int force)
{
WT_DECL_RET;
WT_BTREE *btree;
@@ -21,7 +21,7 @@ __wt_bt_cache_flush(
btree = session->btree;
/*
- * If we need a new snapshot, mark the root page dirty to ensure a
+ * If we need a new checkpoint, mark the root page dirty to ensure a
* write.
*/
if (force) {
@@ -51,9 +51,9 @@ __wt_bt_cache_flush(
* already works that way. None of these problems can't be fixed, but
* I don't see a reason to change at this time, either.
*/
- btree->snap = snapbase;
+ btree->ckpt = ckptbase;
ret = __wt_sync_file_serial(session, op);
- btree->snap = NULL;
+ btree->ckpt = NULL;
WT_RET(ret);
switch (op) {
diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c
index 929e9269651..60ecdd7e3fa 100644
--- a/src/btree/bt_vrfy.c
+++ b/src/btree/bt_vrfy.c
@@ -25,6 +25,7 @@ typedef struct {
WT_ITEM *tmp2; /* Temporary buffer */
} WT_VSTUFF;
+static void __verify_checkpoint_reset(WT_VSTUFF *);
static int __verify_int(WT_SESSION_IMPL *, int);
static int __verify_overflow(
WT_SESSION_IMPL *, const uint8_t *, uint32_t, WT_VSTUFF *);
@@ -33,7 +34,6 @@ static int __verify_row_int_key_order(
WT_SESSION_IMPL *, WT_PAGE *, WT_REF *, uint32_t, WT_VSTUFF *);
static int __verify_row_leaf_key_order(
WT_SESSION_IMPL *, WT_PAGE *, WT_VSTUFF *);
-static void __verify_snapshot_reset(WT_VSTUFF *);
static int __verify_tree(WT_SESSION_IMPL *, WT_PAGE *, uint64_t, WT_VSTUFF *);
/*
@@ -79,13 +79,13 @@ static int
__verify_int(WT_SESSION_IMPL *session, int dumpfile)
{
WT_BTREE *btree;
+ WT_CKPT *ckptbase, *ckpt;
WT_DECL_RET;
WT_ITEM dsk;
- WT_SNAPSHOT *snapbase, *snap;
WT_VSTUFF *vs, _vstuff;
btree = session->btree;
- snapbase = NULL;
+ ckptbase = NULL;
WT_CLEAR(_vstuff);
vs = &_vstuff;
@@ -95,22 +95,22 @@ __verify_int(WT_SESSION_IMPL *session, int dumpfile)
WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp1));
WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp2));
- /* Get a list of the snapshots for this file. */
- WT_ERR(__wt_meta_snaplist_get(session, btree->name, &snapbase));
+ /* Get a list of the checkpoints for this file. */
+ WT_ERR(__wt_meta_ckptlist_get(session, btree->name, &ckptbase));
/* Inform the underlying block manager we're verifying. */
- WT_ERR(__wt_bm_verify_start(session, snapbase));
+ WT_ERR(__wt_bm_verify_start(session, ckptbase));
- /* Loop through the file's snapshots, verifying each one. */
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
+ /* Loop through the file's checkpoints, verifying each one. */
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
WT_VERBOSE_ERR(session, verify,
- "%s: snapshot %s", btree->name, snap->name);
+ "%s: checkpoint %s", btree->name, ckpt->name);
- /* House-keeping between snapshots. */
- __verify_snapshot_reset(vs);
+ /* House-keeping between checkpoints. */
+ __verify_checkpoint_reset(vs);
/*
- * Load the snapshot -- if the size of the root page is 0, the
+ * Load the checkpoint -- if the size of the root page is 0, the
* file is empty.
*
* Clearing the root page reference here is not an error: any
@@ -120,10 +120,10 @@ __verify_int(WT_SESSION_IMPL *session, int dumpfile)
* we can't ever use it again.
*/
WT_CLEAR(dsk);
- WT_ERR(__wt_bm_snapshot_load(
- session, &dsk, snap->raw.data, snap->raw.size, 1));
+ WT_ERR(__wt_bm_checkpoint_load(
+ session, &dsk, ckpt->raw.data, ckpt->raw.size, 1));
if (dsk.size != 0) {
- /* Verify, then discard the snapshot from the cache. */
+ /* Verify then discard the checkpoint from the cache. */
if ((ret = __wt_btree_tree_open(session, &dsk)) == 0) {
ret = __verify_tree(
session, btree->root_page, (uint64_t)1, vs);
@@ -132,13 +132,13 @@ __verify_int(WT_SESSION_IMPL *session, int dumpfile)
}
}
- /* Unload the snapshot. */
- WT_TRET(__wt_bm_snapshot_unload(session));
+ /* Unload the checkpoint. */
+ WT_TRET(__wt_bm_checkpoint_unload(session));
WT_ERR(ret);
}
- /* Discard the list of snapshots. */
-err: __wt_meta_snaplist_free(session, snapbase);
+ /* Discard the list of checkpoints. */
+err: __wt_meta_ckptlist_free(session, ckptbase);
/* Inform the underlying block manager we're done. */
WT_TRET(__wt_bm_verify_end(session));
@@ -158,19 +158,19 @@ err: __wt_meta_snaplist_free(session, snapbase);
}
/*
- * __verify_snapshot_reset --
- * Reset anything needing to be reset for each new snapshot verification.
+ * __verify_checkpoint_reset --
+ * Reset anything needing to be reset for each new checkpoint verification.
*/
static void
-__verify_snapshot_reset(WT_VSTUFF *vs)
+__verify_checkpoint_reset(WT_VSTUFF *vs)
{
/*
- * Key order is per snapshot, reset the data length that serves as a
+ * Key order is per checkpoint, reset the data length that serves as a
* flag value.
*/
vs->max_addr->size = 0;
- /* Record total is per snapshot, reset the record count. */
+ /* Record total is per checkpoint, reset the record count. */
vs->record_total = 0;
}
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 633bd7e04db..58c13a9a282 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -305,7 +305,7 @@ __wt_rec_write(
/*
* Root pages are trickier. First, if the page is empty or we performed
* a 1-for-1 page swap, we're done, we've written the root (and done the
- * snapshot).
+ * checkpoint).
*/
switch (F_ISSET(page->modify, WT_PM_REC_MASK)) {
case WT_PM_REC_EMPTY: /* Page is empty */
@@ -337,10 +337,10 @@ __wt_rec_write(
* root page, pointing to a chain of pages, each of which are flagged as
* "split" pages, up to a final replacement page. We don't use those
* pages again, they are discarded in the next root page reconciliation.
- * We could discard them immediately (because the snapshot is complete,
- * any pages we discard go on the next snapshot's free list, it's safe
- * to do), but the code is simpler this way, and this operation should
- * not be common.
+ * We could discard them immediately (as the checkpoint is complete, any
+ * pages we discard go on the next checkpoint's free list, it's safe to
+ * do), but the code is simpler this way, and this operation should not
+ * be common.
*/
WT_VERBOSE_RET(session, reconcile,
"root page split %p -> %p", page, page->modify->u.split);
@@ -822,7 +822,7 @@ __rec_split_finish(WT_SESSION_IMPL *session)
WT_BOUNDARY *bnd;
WT_PAGE_HEADER *dsk;
WT_RECONCILE *r;
- int snapshot;
+ int checkpoint;
r = session->reconcile;
@@ -861,18 +861,18 @@ __rec_split_finish(WT_SESSION_IMPL *session)
}
/*
- * Third, check to see if we're creating a snapshot: any time we write
+ * Third, check to see if we're creating a checkpoint: any time we write
* the root page of the tree, we tell the underlying block manager so it
- * can write and return the additional information a snapshot requires.
+ * can write and return any additional information checkpoints require.
*/
- snapshot = r->bnd_next == 1 && WT_PAGE_IS_ROOT(r->page);
+ checkpoint = r->bnd_next == 1 && WT_PAGE_IS_ROOT(r->page);
/* Finalize the header information and write the page. */
dsk = r->dsk.mem;
dsk->recno = bnd->recno;
dsk->u.entries = r->entries;
r->dsk.size = WT_PTRDIFF32(r->first_free, dsk);
- return (__rec_split_write(session, bnd, &r->dsk, snapshot));
+ return (__rec_split_write(session, bnd, &r->dsk, checkpoint));
}
/*
@@ -960,7 +960,7 @@ err: __wt_scr_free(&tmp);
*/
static int
__rec_split_write(
- WT_SESSION_IMPL *session, WT_BOUNDARY *bnd, WT_ITEM *buf, int snapshot)
+ WT_SESSION_IMPL *session, WT_BOUNDARY *bnd, WT_ITEM *buf, int checkpoint)
{
WT_CELL *cell;
WT_PAGE_HEADER *dsk;
@@ -992,16 +992,16 @@ __rec_split_write(
/*
* Write the chunk and save the location information. There is one big
- * question: if this is a snapshot, then we're going to have to wrap up
+ * question: if this is a checkpoint, we're going to have to wrap up
* our tracking information (freeing blocks we no longer need) before we
- * can create the snapshot, because snapshots write extent lists, that
- * is, the whole system has to be consistent. We have to handle empty
- * tree snapshots elsewhere (because we don't write anything for empty
- * tree snapshots, they don't come through this path). Given that fact,
- * clear the boundary information as a reminder, and do the snapshot at
- * a later time, during wrapup.
+ * can create the checkpoint, because checkpoints may write additional
+ * information. We have to handle empty tree checkpoints elsewhere
+ * (because we don't write anything for empty tree checkpoints, they
+ * don't come through this path). Given that fact, clear the boundary
+ * information as a reminder, and do the checkpoint at a later time,
+ * during wrapup.
*/
- if (snapshot) {
+ if (checkpoint) {
bnd->addr.addr = NULL;
bnd->addr.size = 0;
} else {
@@ -2866,7 +2866,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* Root page split: the last entry on the list. There
* won't be a page to discard because writing the page
- * created a snapshot, not a replacement page.
+ * created a checkpoint, not a replacement page.
*/
WT_ASSERT(session, mod->u.replace.addr == NULL);
break;
@@ -2906,7 +2906,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
* address blocks (if any).
*
* The exception is root pages are never tracked or free'd, they
- * are snapshots, and must be explicitly dropped.
+ * are checkpoints, and must be explicitly dropped.
*/
if (!WT_PAGE_IS_ROOT(page) && page->ref->addr != NULL) {
__wt_get_addr(page->parent, page->ref, &addr, &size);
@@ -2921,7 +2921,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
* Discard the replacement leaf page's blocks.
*
* The exception is root pages are never tracked or free'd, they
- * are snapshots, and must be explicitly dropped.
+ * are checkpoints, and must be explicitly dropped.
*/
if (!WT_PAGE_IS_ROOT(page))
WT_RET(__wt_rec_track(session, page,
@@ -2952,10 +2952,10 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* Wrap up discarded block and overflow tracking. If we are about to
- * create a snapshot, the system must be entirely consistent at that
+ * create a checkpoint, the system must be entirely consistent at that
* point, the underlying block manager is presumably going to do some
* action to resolve the list of allocated/free/whatever blocks that
- * are associated with the snapshot.
+ * are associated with the checkpoint.
*/
WT_RET(__wt_rec_track_wrapup(session, page));
@@ -2966,7 +2966,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
/* If this is the root page, we need to create a sync point. */
if (WT_PAGE_IS_ROOT(page))
- WT_RET(__wt_bm_snapshot(session, NULL, btree->snap));
+ WT_RET(__wt_bm_checkpoint(session, NULL, btree->ckpt));
/*
* If the page was empty, we want to discard it from the tree
@@ -2989,7 +2989,8 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
*/
bnd = &r->bnd[0];
if (bnd->addr.addr == NULL)
- WT_RET(__wt_bm_snapshot(session, &r->dsk, btree->snap));
+ WT_RET(
+ __wt_bm_checkpoint(session, &r->dsk, btree->ckpt));
else {
mod->u.replace = bnd->addr;
bnd->addr.addr = NULL;
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 34c22c0ce43..88d1388eaa5 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -76,23 +76,24 @@ __wt_confchk_cursor_close =
const char *
__wt_confdfl_file_meta =
- "allocation_size=512B,block_compressor="",checksum=true,collator="","
- "columns=(),huffman_key="",huffman_value="",internal_item_max=0,"
- "internal_key_truncate=true,internal_page_max=2KB,key_format=u,key_gap=10"
- ",leaf_item_max=0,leaf_page_max=1MB,prefix_compression=true,snapshot="","
- "split_pct=75,type=btree,value_format=u,version=(major=0,minor=0)";
+ "allocation_size=512B,block_compressor="",checkpoint="",checksum=true,"
+ "collator="",columns=(),huffman_key="",huffman_value="","
+ "internal_item_max=0,internal_key_truncate=true,internal_page_max=2KB,"
+ "key_format=u,key_gap=10,leaf_item_max=0,leaf_page_max=1MB,"
+ "prefix_compression=true,split_pct=75,type=btree,value_format=u,"
+ "version=(major=0,minor=0)";
const char *
__wt_confchk_file_meta =
"allocation_size=(type=int,min=512B,max=128MB),block_compressor=(),"
- "checksum=(type=boolean),collator=(),columns=(type=list),huffman_key=(),"
- "huffman_value=(),internal_item_max=(type=int,min=0),"
+ "checkpoint=(),checksum=(type=boolean),collator=(),columns=(type=list),"
+ "huffman_key=(),huffman_value=(),internal_item_max=(type=int,min=0),"
"internal_key_truncate=(type=boolean),internal_page_max=(type=int,"
"min=512B,max=512MB),key_format=(type=format),key_gap=(type=int,min=0),"
"leaf_item_max=(type=int,min=0),leaf_page_max=(type=int,min=512B,"
- "max=512MB),prefix_compression=(type=boolean),snapshot=(),"
- "split_pct=(type=int,min=25,max=100),type=(choices=[\"btree\"]),"
- "value_format=(type=format),version=()";
+ "max=512MB),prefix_compression=(type=boolean),split_pct=(type=int,min=25,"
+ "max=100),type=(choices=[\"btree\"]),value_format=(type=format),"
+ "version=()";
const char *
__wt_confdfl_index_meta =
@@ -271,6 +272,6 @@ __wt_confchk_wiredtiger_open =
"multiprocess=(type=boolean),session_max=(type=int,min=1),"
"sync=(type=boolean),transactional=(type=boolean),"
"use_environment_priv=(type=boolean),verbose=(type=list,"
- "choices=[\"block\",\"evict\",\"evictserver\",\"fileops\",\"hazard\","
- "\"mutex\",\"read\",\"readserver\",\"reconcile\",\"salvage\",\"snapshot\""
- ",\"verify\",\"write\"])";
+ "choices=[\"block\",\"ckpt\",\"evict\",\"evictserver\",\"fileops\","
+ "\"hazard\",\"mutex\",\"read\",\"readserver\",\"reconcile\",\"salvage\","
+ "\"verify\",\"write\"])";
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 2680c995477..63fe1ef68ce 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -705,6 +705,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
uint32_t flag;
} *ft, verbtypes[] = {
{ "block", WT_VERB_block },
+ { "ckpt", WT_VERB_ckpt },
{ "evict", WT_VERB_evict },
{ "evictserver",WT_VERB_evictserver },
{ "fileops", WT_VERB_fileops },
@@ -715,7 +716,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
{ "reconcile", WT_VERB_reconcile },
{ "salvage", WT_VERB_salvage },
{ "verify", WT_VERB_verify },
- { "snapshot", WT_VERB_snapshot },
{ "write", WT_VERB_write },
{ NULL, 0 }
}, directio_types[] = {
diff --git a/src/conn/conn_btree.c b/src/conn/conn_btree.c
index 236858a9c9f..0951bd635b9 100644
--- a/src/conn/conn_btree.c
+++ b/src/conn/conn_btree.c
@@ -89,9 +89,9 @@ __conn_btree_get(WT_SESSION_IMPL *session,
__wt_spin_lock(session, &conn->spinlock);
TAILQ_FOREACH(btree, &conn->btqh, q) {
if (strcmp(name, btree->name) == 0 &&
- ((ckpt == NULL && btree->ckpt == NULL) ||
- (ckpt != NULL && btree->ckpt != NULL &&
- strcmp(ckpt, btree->ckpt) == 0))) {
+ ((ckpt == NULL && btree->checkpoint == NULL) ||
+ (ckpt != NULL && btree->checkpoint != NULL &&
+ strcmp(ckpt, btree->checkpoint) == 0))) {
++btree->refcnt;
session->btree = btree;
matched = 1;
@@ -114,7 +114,7 @@ __conn_btree_get(WT_SESSION_IMPL *session,
session, "btree handle", &btree->rwlock)) == 0 &&
(ret = __wt_strdup(session, name, &btree->name)) == 0 &&
(ckpt == NULL ||
- (ret = __wt_strdup(session, ckpt, &btree->ckpt)) == 0)) {
+ (ret = __wt_strdup(session, ckpt, &btree->checkpoint)) == 0)) {
/* Lock the handle before it is inserted in the list. */
__wt_writelock(session, btree->rwlock);
F_SET(btree, WT_BTREE_EXCLUSIVE);
@@ -132,7 +132,7 @@ __conn_btree_get(WT_SESSION_IMPL *session,
if (btree->rwlock != NULL)
__wt_rwlock_destroy(session, &btree->rwlock);
__wt_free(session, btree->name);
- __wt_free(session, btree->ckpt);
+ __wt_free(session, btree->checkpoint);
__wt_overwrite_and_free(session, btree);
}
@@ -156,7 +156,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session)
if (!F_ISSET(btree,
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
- ret = __wt_snapshot(session, NULL);
+ ret = __wt_checkpoint(session, NULL);
WT_TRET(__wt_btree_close(session));
@@ -207,10 +207,10 @@ __wt_conn_btree_open(WT_SESSION_IMPL *session,
F_SET(btree, WT_BTREE_NO_EVICTION);
do {
- WT_ERR(__wt_meta_snapshot_get(
- session, btree->name, btree->ckpt, addr));
+ WT_ERR(__wt_meta_checkpoint_get(
+ session, btree->name, btree->checkpoint, addr));
WT_ERR(__wt_btree_open(session, addr->data, addr->size, cfg,
- btree->ckpt == NULL ? 0 : 1));
+ btree->checkpoint == NULL ? 0 : 1));
F_SET(btree, WT_BTREE_OPEN);
/* Drop back to a readlock if that is all that was needed. */
@@ -299,7 +299,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
__wt_spin_lock(session, &conn->spinlock);
TAILQ_FOREACH(btree, &conn->btqh, q)
if (F_ISSET(btree, WT_BTREE_OPEN) &&
- btree->ckpt == NULL &&
+ btree->checkpoint == NULL &&
strcmp(btree->name, WT_METADATA_URI) != 0) {
/*
* We have the connection spinlock, which prevents
@@ -453,7 +453,7 @@ __conn_btree_discard(WT_SESSION_IMPL *session, WT_BTREE *btree)
__wt_rwlock_destroy(session, &btree->rwlock);
__wt_free(session, btree->config);
__wt_free(session, btree->name);
- __wt_free(session, btree->ckpt);
+ __wt_free(session, btree->checkpoint);
__wt_overwrite_and_free(session, btree);
return (ret);
diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c
index 293c95d6467..c6ec0e486a5 100644
--- a/src/cursor/cur_std.c
+++ b/src/cursor/cur_std.c
@@ -426,7 +426,7 @@ __wt_cursor_init(WT_CURSOR *cursor,
if (cval.val != 0)
F_SET(cursor, WT_CURSTD_RAW);
- /* Snapshot cursors are read-only. */
+ /* Checkpoint cursors are read-only. */
WT_RET(__wt_config_gets(session, cfg, "checkpoint", &cval));
if (cval.len != 0) {
cursor->insert = (int (*)(WT_CURSOR *))__wt_cursor_notsup;
diff --git a/src/docs/checkpoints.dox b/src/docs/checkpoints.dox
index 07b4dfc3900..ac89c0991b3 100644
--- a/src/docs/checkpoints.dox
+++ b/src/docs/checkpoints.dox
@@ -39,6 +39,6 @@ discards any other checkpoint with the same name (unless they are
currently open in a cursor).
Unnamed checkpoints managed by WiredTiger are given the name
-"WiredTigerInternal".
+"WiredTigerInternalCheckpoint".
*/
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index 7c4f5480cc0..111726693e1 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -30,7 +30,7 @@ URIs
Vv
WiredTiger
WiredTiger's
-WiredTigerInternal
+WiredTigerInternalCheckpoint
aR
ack'ed
alloc
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index ca5a408d640..f8b05b7ac07 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -1,6 +1,6 @@
/*! @page upgrading Upgrading WiredTiger applications
-@section version13 Version 1.3
+@section version13api Version 1.3 API changes
The checkpoint functionality supported by WT_SESSION::checkpoint and the
snapshot functionality supported by WT_SESSION::sync have been merged
@@ -34,4 +34,9 @@ from the command line.
The \c -s options to the \c dump and \c list commands for the \c wt
command line utility have been renamed to be \c -c.
+@section version13file Version 1.3 file format changes
+
+The underlying file formats changed in the 1.3 release; tables and files
+should be dumped and re-loaded into a new database.
+
*/
diff --git a/src/include/api.h b/src/include/api.h
index 3e2483961ce..46e1db3f62f 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -340,16 +340,16 @@ extern WT_PROCESS __wt_process;
#define WT_SESSION_INTERNAL 0x00000002
#define WT_SESSION_SALVAGE_QUIET_ERR 0x00000001
#define WT_VERB_block 0x00001000
-#define WT_VERB_evict 0x00000800
-#define WT_VERB_evictserver 0x00000400
-#define WT_VERB_fileops 0x00000200
-#define WT_VERB_hazard 0x00000100
-#define WT_VERB_mutex 0x00000080
-#define WT_VERB_read 0x00000040
-#define WT_VERB_readserver 0x00000020
-#define WT_VERB_reconcile 0x00000010
-#define WT_VERB_salvage 0x00000008
-#define WT_VERB_snapshot 0x00000004
+#define WT_VERB_ckpt 0x00000800
+#define WT_VERB_evict 0x00000400
+#define WT_VERB_evictserver 0x00000200
+#define WT_VERB_fileops 0x00000100
+#define WT_VERB_hazard 0x00000080
+#define WT_VERB_mutex 0x00000040
+#define WT_VERB_read 0x00000020
+#define WT_VERB_readserver 0x00000010
+#define WT_VERB_reconcile 0x00000008
+#define WT_VERB_salvage 0x00000004
#define WT_VERB_verify 0x00000002
#define WT_VERB_write 0x00000001
/*
diff --git a/src/include/block.h b/src/include/block.h
index 26dde46b715..0dffb8f46a4 100644
--- a/src/include/block.h
+++ b/src/include/block.h
@@ -17,10 +17,10 @@
#define WT_BLOCK_INVALID_OFFSET 0
/*
- * The block manager maintains three per-snapshot extent lists:
- * alloc: the extents allocated in this snapshot
+ * The block manager maintains three per-checkpoint extent lists:
+ * alloc: the extents allocated in this checkpoint
* avail: the extents available for allocation
- * discard: the extents freed in this snapshot
+ * discard: the extents freed in this checkpoint
* Each of the extent lists is based on two skiplists: first, a by-offset list
* linking WT_EXT elements and sorted by file offset (low-to-high), second, a
* by-size list linking WT_SIZE elements and sorted by chunk size (low-to-high).
@@ -60,7 +60,7 @@ struct __wt_extlist {
/*
* WT_EXT --
* Encapsulation of an extent, either allocated or freed within the
- * snapshot.
+ * checkpoint.
*/
struct __wt_ext {
off_t off; /* Extent's file offset */
@@ -106,16 +106,16 @@ struct __wt_size {
(skip) != NULL; (skip) = (skip)->next[(skip)->depth])
/*
- * Snapshot cookie: carries a version number as I don't want to rev the schema
- * file version should the default block manager snapshot format change.
+ * Checkpoint cookie: carries a version number as I don't want to rev the schema
+ * file version should the default block manager checkpoint format change.
*
- * Version #1 snapshot cookie format:
+ * Version #1 checkpoint cookie format:
* [1] [root addr] [alloc addr] [avail addr] [discard addr]
- * [file size] [snapshot size] [write generation]
+ * [file size] [checkpoint size] [write generation]
*/
-#define WT_BM_SNAPSHOT_VERSION 1 /* Snapshot format version */
+#define WT_BM_CHECKPOINT_VERSION 1 /* Checkpoint format version */
#define WT_BLOCK_EXTLIST_MAGIC 71002 /* Identify a list */
-struct __wt_block_snapshot {
+struct __wt_block_ckpt {
uint8_t version; /* Version */
off_t root_offset; /* The root */
@@ -125,9 +125,9 @@ struct __wt_block_snapshot {
WT_EXTLIST avail; /* Extents available */
WT_EXTLIST discard; /* Extents discarded */
- off_t file_size; /* Snapshot file size */
- uint64_t snapshot_size; /* Snapshot byte count */
- WT_EXTLIST snapshot_avail; /* Snapshot free'd extents */
+ off_t file_size; /* Checkpoint file size */
+ uint64_t ckpt_size; /* Checkpoint byte count */
+ WT_EXTLIST ckpt_avail; /* Checkpoint free'd extents */
uint64_t write_gen; /* Write generation */
};
@@ -144,9 +144,9 @@ struct __wt_block {
uint32_t allocsize; /* Allocation size */
int checksum; /* If checksums configured */
- WT_SPINLOCK live_lock; /* Lock to protect the live snapshot */
- WT_BLOCK_SNAPSHOT live; /* Live snapshot */
- int live_load; /* Live snapshot loaded */
+ WT_SPINLOCK live_lock; /* Live checkpoint lock */
+ WT_BLOCK_CKPT live; /* Live checkpoint */
+ int live_load; /* Live checkpoint loaded */
WT_COMPRESSOR *compressor; /* Page compressor */
@@ -156,11 +156,11 @@ struct __wt_block {
/* Verification support */
int verify; /* If performing verification */
- off_t verify_size; /* Snapshot's file size */
+ off_t verify_size; /* Checkpoint's file size */
WT_EXTLIST verify_alloc; /* Verification allocation list */
uint32_t frags; /* Maximum frags in the file */
uint8_t *fragfile; /* Per-file frag tracking list */
- uint8_t *fragsnap; /* Per-snapshot frag tracking list */
+ uint8_t *fragckpt; /* Per-checkpoint frag tracking list */
};
/*
diff --git a/src/include/btree.h b/src/include/btree.h
index e713107e10e..50ca84dc194 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -68,15 +68,13 @@ struct __wt_btree {
uint32_t refcnt; /* Sessions using this tree. */
TAILQ_ENTRY(__wt_btree) q; /* Linked list of handles */
- volatile uint32_t lru_count; /* Count of threads in LRU eviction. */
-
const char *name; /* Object name as a URI */
+ const char *checkpoint; /* Checkpoint name (or NULL) */
const char *config; /* Configuration string */
- const char *ckpt; /* Checkpoint name (or NULL) */
/* XXX Should move into the session-level handle information. */
- WT_RWLOCK *snaplock; /* Lock for snapshot creation */
- WT_SNAPSHOT *snap; /* Snapshot information */
+ WT_RWLOCK *ckptlock; /* Lock for checkpoint creation */
+ WT_CKPT *ckpt; /* Checkpoint information */
enum { BTREE_COL_FIX=1, /* Fixed-length column store */
BTREE_COL_VAR=2, /* Variable-length column store */
@@ -112,6 +110,7 @@ struct __wt_btree {
u_int block_header; /* Block manager header length */
WT_PAGE *evict_page; /* Eviction thread's location */
+ volatile uint32_t lru_count; /* Count of threads in LRU eviction. */
WT_BTREE_STATS *stats; /* Btree statistics */
diff --git a/src/include/extern.h b/src/include/extern.h
index 323dbbcf712..51307283fcc 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -19,14 +19,35 @@ extern int __wt_block_addr_string(WT_SESSION_IMPL *session,
WT_ITEM *buf,
const uint8_t *addr,
uint32_t addr_size);
-extern int __wt_block_buffer_to_snapshot(WT_SESSION_IMPL *session,
+extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session,
WT_BLOCK *block,
const uint8_t *p,
- WT_BLOCK_SNAPSHOT *si);
-extern int __wt_block_snapshot_to_buffer(WT_SESSION_IMPL *session,
+ WT_BLOCK_CKPT *ci);
+extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session,
WT_BLOCK *block,
uint8_t **pp,
- WT_BLOCK_SNAPSHOT *si);
+ WT_BLOCK_CKPT *ci);
+extern int __wt_block_ckpt_init(WT_SESSION_IMPL *session,
+ WT_BLOCK *block,
+ WT_BLOCK_CKPT *ci,
+ const char *name,
+ int is_live);
+extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session,
+ WT_BLOCK *block,
+ WT_ITEM *dsk,
+ const uint8_t *addr,
+ uint32_t addr_size,
+ int readonly);
+extern int __wt_block_checkpoint_unload(WT_SESSION_IMPL *session,
+ WT_BLOCK *block);
+extern void __wt_block_ckpt_destroy(WT_SESSION_IMPL *session,
+ WT_BLOCK_CKPT *ci);
+extern int __wt_block_checkpoint(WT_SESSION_IMPL *session,
+ WT_BLOCK *block,
+ WT_ITEM *buf,
+ WT_CKPT *ckptbase);
+extern int __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session,
+ WT_BLOCK *block);
extern uint32_t __wt_cksum(const void *chunk, size_t len);
extern int __wt_block_off_remove_overlap( WT_SESSION_IMPL *session,
WT_EXTLIST *el,
@@ -53,7 +74,7 @@ extern int __wt_block_extlist_check( WT_SESSION_IMPL *session,
WT_EXTLIST *bl);
extern int __wt_block_extlist_overlap( WT_SESSION_IMPL *session,
WT_BLOCK *block,
- WT_BLOCK_SNAPSHOT *si);
+ WT_BLOCK_CKPT *ci);
extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session,
WT_EXTLIST *a,
WT_EXTLIST *b);
@@ -97,17 +118,16 @@ extern int __wt_bm_open(WT_SESSION_IMPL *session,
const char *cfg[],
int forced_salvage);
extern int __wt_bm_close(WT_SESSION_IMPL *session);
-extern int __wt_bm_snapshot(WT_SESSION_IMPL *session,
+extern int __wt_bm_checkpoint(WT_SESSION_IMPL *session,
WT_ITEM *buf,
- WT_SNAPSHOT *snapbase);
-extern int __wt_bm_snapshot_resolve(WT_SESSION_IMPL *session,
- WT_SNAPSHOT *snapbase);
-extern int __wt_bm_snapshot_load(WT_SESSION_IMPL *session,
+ WT_CKPT *ckptbase);
+extern int __wt_bm_checkpoint_resolve(WT_SESSION_IMPL *session);
+extern int __wt_bm_checkpoint_load(WT_SESSION_IMPL *session,
WT_ITEM *buf,
const uint8_t *addr,
uint32_t addr_size,
int readonly);
-extern int __wt_bm_snapshot_unload(WT_SESSION_IMPL *session);
+extern int __wt_bm_checkpoint_unload(WT_SESSION_IMPL *session);
extern int __wt_bm_truncate(WT_SESSION_IMPL *session, const char *filename);
extern int __wt_bm_free(WT_SESSION_IMPL *session,
const uint8_t *addr,
@@ -130,8 +150,7 @@ extern int __wt_bm_salvage_next(WT_SESSION_IMPL *session,
uint64_t *write_genp,
int *eofp);
extern int __wt_bm_salvage_end(WT_SESSION_IMPL *session);
-extern int __wt_bm_verify_start(WT_SESSION_IMPL *session,
- WT_SNAPSHOT *snapbase);
+extern int __wt_bm_verify_start(WT_SESSION_IMPL *session, WT_CKPT *ckptbase);
extern int __wt_bm_verify_end(WT_SESSION_IMPL *session);
extern int __wt_bm_verify_addr(WT_SESSION_IMPL *session,
const uint8_t *addr,
@@ -167,38 +186,16 @@ extern int __wt_block_salvage_next( WT_SESSION_IMPL *session,
uint32_t *addr_sizep,
uint64_t *write_genp,
int *eofp);
-extern int __wt_block_snap_init(WT_SESSION_IMPL *session,
- WT_BLOCK *block,
- WT_BLOCK_SNAPSHOT *si,
- const char *name,
- int is_live);
-extern int __wt_block_snapshot_load(WT_SESSION_IMPL *session,
- WT_BLOCK *block,
- WT_ITEM *dsk,
- const uint8_t *addr,
- uint32_t addr_size,
- int readonly);
-extern int __wt_block_snapshot_unload(WT_SESSION_IMPL *session,
- WT_BLOCK *block);
-extern void __wt_block_snap_destroy(WT_SESSION_IMPL *session,
- WT_BLOCK_SNAPSHOT *si);
-extern int __wt_block_snapshot(WT_SESSION_IMPL *session,
- WT_BLOCK *block,
- WT_ITEM *buf,
- WT_SNAPSHOT *snapbase);
-extern int __wt_block_snapshot_resolve( WT_SESSION_IMPL *session,
- WT_BLOCK *block,
- WT_SNAPSHOT *snapbase);
extern int __wt_block_verify_start( WT_SESSION_IMPL *session,
WT_BLOCK *block,
- WT_SNAPSHOT *snapbase);
+ WT_CKPT *ckptbase);
extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block);
-extern int __wt_verify_snap_load( WT_SESSION_IMPL *session,
+extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session,
WT_BLOCK *block,
- WT_BLOCK_SNAPSHOT *si);
-extern int __wt_verify_snap_unload( WT_SESSION_IMPL *session,
+ WT_BLOCK_CKPT *ci);
+extern int __wt_verify_ckpt_unload( WT_SESSION_IMPL *session,
WT_BLOCK *block,
- WT_BLOCK_SNAPSHOT *si);
+ WT_BLOCK_CKPT *ci);
extern int __wt_block_verify(WT_SESSION_IMPL *session,
WT_BLOCK *block,
WT_ITEM *buf,
@@ -322,11 +319,11 @@ extern int __wt_kv_return(WT_SESSION_IMPL *session,
WT_CURSOR_BTREE *cbt,
int key_ret);
extern int __wt_bt_salvage( WT_SESSION_IMPL *session,
- WT_SNAPSHOT *snapbase,
+ WT_CKPT *ckptbase,
const char *cfg[]);
extern int __wt_btree_stat_init(WT_SESSION_IMPL *session);
extern int __wt_bt_cache_flush( WT_SESSION_IMPL *session,
- WT_SNAPSHOT *snapbase,
+ WT_CKPT *ckptbase,
int op,
int force);
extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]);
@@ -616,29 +613,29 @@ extern WT_LOGREC_DESC __wt_logdesc_debug;
extern int __wt_metadata_get(WT_SESSION *session,
const char *uri,
const char **valuep);
-extern int __wt_metadata_get_snaplist( WT_SESSION *session,
+extern int __wt_metadata_get_ckptlist( WT_SESSION *session,
const char *name,
- WT_SNAPSHOT **snapbasep);
-extern void __wt_metadata_free_snaplist(WT_SESSION *session,
- WT_SNAPSHOT *snapbase);
+ WT_CKPT **ckptbasep);
+extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase);
extern int __wt_meta_btree_apply(WT_SESSION_IMPL *session,
int (*func)(WT_SESSION_IMPL *,
const char *[]),
const char *cfg[],
uint32_t flags);
-extern int __wt_meta_snapshot_get(WT_SESSION_IMPL *session,
+extern int __wt_meta_checkpoint_get(WT_SESSION_IMPL *session,
const char *name,
- const char *snapshot,
+ const char *checkpoint,
WT_ITEM *addr);
-extern int __wt_meta_snapshot_clear(WT_SESSION_IMPL *session, const char *name);
-extern int __wt_meta_snaplist_get( WT_SESSION_IMPL *session,
+extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session,
+ const char *name);
+extern int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session,
const char *name,
- WT_SNAPSHOT **snapbasep);
-extern int __wt_meta_snaplist_set( WT_SESSION_IMPL *session,
+ WT_CKPT **ckptbasep);
+extern int __wt_meta_ckptlist_set( WT_SESSION_IMPL *session,
const char *name,
- WT_SNAPSHOT *snapbase);
-extern void __wt_meta_snaplist_free(WT_SESSION_IMPL *session,
- WT_SNAPSHOT *snapbase);
+ WT_CKPT *ckptbase);
+extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session,
+ WT_CKPT *ckptbase);
extern int __wt_metadata_open(WT_SESSION_IMPL *session);
extern int __wt_metadata_cursor( WT_SESSION_IMPL *session,
const char *config,
@@ -902,8 +899,8 @@ extern int __wt_session_get_btree(WT_SESSION_IMPL *session,
const char *uri,
const char *cfg[],
uint32_t flags);
-extern int __wt_session_lock_snapshot( WT_SESSION_IMPL *session,
- const char *snapshot,
+extern int __wt_session_lock_checkpoint( WT_SESSION_IMPL *session,
+ const char *checkpoint,
uint32_t flags);
extern int __wt_session_discard_btree( WT_SESSION_IMPL *session,
WT_BTREE_SESSION *btree_session);
@@ -1048,11 +1045,12 @@ extern void __wt_stat_clear_connection_stats(WT_STATS *stats_arg);
extern int __wt_txnid_cmp(const void *v1, const void *v2);
extern int __wt_txn_get_snapshot(WT_SESSION_IMPL *session, wt_txnid_t max_id);
extern int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_txn_release(WT_SESSION_IMPL *session);
extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]);
-extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_init(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_global_init(WT_CONNECTION_IMPL *conn, const char *cfg[]);
extern void __wt_txn_global_destroy(WT_CONNECTION_IMPL *conn);
-extern int __wt_snapshot(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
diff --git a/src/include/meta.h b/src/include/meta.h
index 7e696cf0aef..ecef945c379 100644
--- a/src/include/meta.h
+++ b/src/include/meta.h
@@ -14,30 +14,30 @@
#define WT_METADATA_VERSION_STR "WiredTiger version string"
/*
- * WT_SNAPSHOT --
- * Encapsulation of snapshot information, shared by the metadata, the
+ * WT_CKPT --
+ * Encapsulation of checkpoint information, shared by the metadata, the
* btree engine, and the block manager.
*/
-#define WT_INTERNAL_SNAPSHOT "WiredTigerInternalSnapshot"
-#define WT_SNAPSHOT_FOREACH(snapbase, snap) \
- for ((snap) = (snapbase); (snap)->name != NULL; ++(snap))
+#define WT_INTERNAL_CHKPT "WiredTigerInternalCheckpoint"
+#define WT_CKPT_FOREACH(ckptbase, ckpt) \
+ for ((ckpt) = (ckptbase); (ckpt)->name != NULL; ++(ckpt))
-struct __wt_snapshot {
+struct __wt_ckpt {
char *name; /* Name or NULL */
- WT_ITEM addr; /* Snapshot cookie string */
- WT_ITEM raw; /* Snapshot cookie raw */
+ WT_ITEM addr; /* Checkpoint cookie string */
+ WT_ITEM raw; /* Checkpoint cookie raw */
- int64_t order; /* Snapshot order */
+ int64_t order; /* Checkpoint order */
uintmax_t sec; /* Timestamp */
- uint64_t snapshot_size; /* Snapshot size */
+ uint64_t ckpt_size; /* Checkpoint size */
void *bpriv; /* Block manager private */
-#define WT_SNAP_ADD 0x01 /* Snapshot to be added */
-#define WT_SNAP_DELETE 0x02 /* Snapshot to be deleted */
-#define WT_SNAP_UPDATE 0x04 /* Snapshot requires update */
+#define WT_CKPT_ADD 0x01 /* Checkpoint to be added */
+#define WT_CKPT_DELETE 0x02 /* Checkpoint to be deleted */
+#define WT_CKPT_UPDATE 0x04 /* Checkpoint requires update */
uint32_t flags;
};
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 676e0dd31d9..2027a47a0aa 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -711,7 +711,7 @@ struct __wt_session {
/*! Upgrade a file or table.
*
- * Upgrade upgrades a file, or the files of which a table is comprised.
+ * Upgrade upgrades a file or table, if upgrade is required.
*
* @snippet ex_all.c session upgrade
*
@@ -1058,10 +1058,10 @@ struct __wt_connection {
* information.,a boolean flag; default \c false.}
* @config{verbose, enable messages for various events. Options are given as a
* list\, such as <code>"verbose=[evictserver\,read]"</code>.,a list\, with
- * values chosen from the following options: \c "block"\, \c "evict"\, \c
- * "evictserver"\, \c "fileops"\, \c "hazard"\, \c "mutex"\, \c "read"\, \c
- * "readserver"\, \c "reconcile"\, \c "salvage"\, \c "snapshot"\, \c "verify"\,
- * \c "write"; default empty.}
+ * values chosen from the following options: \c "block"\, \c "ckpt"\, \c
+ * "evict"\, \c "evictserver"\, \c "fileops"\, \c "hazard"\, \c "mutex"\, \c
+ * "read"\, \c "readserver"\, \c "reconcile"\, \c "salvage"\, \c "verify"\, \c
+ * "write"; default empty.}
* @configend
* Additionally, if a file named \c WiredTiger.config appears in the WiredTiger
* home directory, it is read for configuration values (see @ref config_file
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index 81c6fe7b053..ffc619f5d9b 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -50,12 +50,12 @@ struct __wt_addr;
typedef struct __wt_addr WT_ADDR;
struct __wt_block;
typedef struct __wt_block WT_BLOCK;
+struct __wt_block_ckpt;
+ typedef struct __wt_block_ckpt WT_BLOCK_CKPT;
struct __wt_block_desc;
typedef struct __wt_block_desc WT_BLOCK_DESC;
struct __wt_block_header;
typedef struct __wt_block_header WT_BLOCK_HEADER;
-struct __wt_block_snapshot;
- typedef struct __wt_block_snapshot WT_BLOCK_SNAPSHOT;
struct __wt_btree;
typedef struct __wt_btree WT_BTREE;
struct __wt_btree_session;
@@ -68,6 +68,8 @@ struct __wt_cell;
typedef struct __wt_cell WT_CELL;
struct __wt_cell_unpack;
typedef struct __wt_cell_unpack WT_CELL_UNPACK;
+struct __wt_ckpt;
+ typedef struct __wt_ckpt WT_CKPT;
struct __wt_col;
typedef struct __wt_col WT_COL;
struct __wt_col_rle;
@@ -144,8 +146,6 @@ struct __wt_session_impl;
typedef struct __wt_session_impl WT_SESSION_IMPL;
struct __wt_size;
typedef struct __wt_size WT_SIZE;
-struct __wt_snapshot;
- typedef struct __wt_snapshot WT_SNAPSHOT;
struct __wt_stats;
typedef struct __wt_stats WT_STATS;
struct __wt_table;
diff --git a/src/meta/meta_api.c b/src/meta/meta_api.c
index 74ea9ea91e5..dee40585f90 100644
--- a/src/meta/meta_api.c
+++ b/src/meta/meta_api.c
@@ -18,23 +18,23 @@ __wt_metadata_get(WT_SESSION *session, const char *uri, const char **valuep)
}
/*
- * __wt_snaplist_get --
- * Public entry point to __wt_meta_snaplist_get (for wt list).
+ * __wt_metadata_get_ckptlist --
+ * Public entry point to __wt_meta_ckptlist_get (for wt list).
*/
int
-__wt_metadata_get_snaplist(
- WT_SESSION *session, const char *name, WT_SNAPSHOT **snapbasep)
+__wt_metadata_get_ckptlist(
+ WT_SESSION *session, const char *name, WT_CKPT **ckptbasep)
{
- return (__wt_meta_snaplist_get(
- (WT_SESSION_IMPL *)session, name, snapbasep));
+ return (__wt_meta_ckptlist_get(
+ (WT_SESSION_IMPL *)session, name, ckptbasep));
}
/*
- * __wt_snaplist_free --
- * Public entry point to __wt_snapshot_list_free (for wt list).
+ * __wt_metadata_free_ckptlist --
+ * Public entry point to __wt_meta_ckptlist_free (for wt list).
*/
void
-__wt_metadata_free_snaplist(WT_SESSION *session, WT_SNAPSHOT *snapbase)
+__wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase)
{
- __wt_meta_snaplist_free((WT_SESSION_IMPL *)session, snapbase);
+ __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, ckptbase);
}
diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c
new file mode 100644
index 00000000000..da40ab5742e
--- /dev/null
+++ b/src/meta/meta_ckpt.c
@@ -0,0 +1,411 @@
+/*-
+ * Copyright (c) 2008-2012 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+static int __ckpt_get(
+ WT_SESSION_IMPL *, const char *, const char *, WT_ITEM *);
+static int __ckpt_get_last(WT_SESSION_IMPL *, const char *, WT_ITEM *);
+static int __ckpt_get_name(
+ WT_SESSION_IMPL *, const char *, const char *, WT_ITEM *);
+static int __ckpt_set(WT_SESSION_IMPL *, const char *, const char *);
+static int __ckpt_version_chk(WT_SESSION_IMPL *, const char *, const char *);
+
+/*
+ * __wt_meta_checkpoint_get --
+ * Return a file's checkpoint address.
+ */
+int
+__wt_meta_checkpoint_get(WT_SESSION_IMPL *session,
+ const char *name, const char *checkpoint, WT_ITEM *addr)
+{
+ WT_DECL_RET;
+
+ /* Get the checkpoint address. */
+ ret = __ckpt_get(session, name, checkpoint, addr);
+
+ /*
+ * If we find a checkpoint, check the version and return the address.
+ * If we don't find a named checkpoint, we're done, they're read-only.
+ * If we don't find a default checkpoint, it's creation, return "no
+ * data" and let our caller handle it.
+ */
+ if (ret == WT_NOTFOUND) {
+ /*
+ * If the caller didn't give us a specific checkpoint name, we
+ * assume it's a creation and there isn't a checkpoint to find.
+ * Let the caller deal with the failure.
+ */
+ if (checkpoint != NULL)
+ WT_RET_MSG(session, WT_NOTFOUND,
+ "no \"%s\" checkpoint found in %s",
+ checkpoint, name);
+
+ addr->data = NULL;
+ addr->size = 0;
+ }
+ return (0);
+}
+
+/*
+ * __wt_meta_checkpoint_clear --
+ * Clear a file's checkpoint.
+ */
+int
+__wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_DECL_RET;
+
+ ret = __ckpt_set(session, name, NULL);
+
+ /*
+ * If we are unrolling a failed create, we may have already removed the
+ * metadata entry. If no entry is found to update and we're trying to
+ * clear the checkpoint, just ignore it.
+ */
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ return (ret);
+}
+
+/*
+ * __ckpt_get --
+ * Get a file's checkpoint.
+ */
+static int
+__ckpt_get(WT_SESSION_IMPL *session,
+ const char *name, const char *checkpoint, WT_ITEM *addr)
+{
+ WT_DECL_RET;
+ const char *config;
+
+ config = NULL;
+
+ /* Retrieve the metadata entry for the file. */
+ WT_ERR(__wt_metadata_read(session, name, &config));
+
+ /* Check the major/minor version numbers. */
+ WT_ERR(__ckpt_version_chk(session, name, config));
+
+ /* Retrieve the named checkpoint or the last checkpoint. */
+ if (checkpoint == NULL)
+ WT_ERR(__ckpt_get_last(session, config, addr));
+ else
+ WT_ERR(__ckpt_get_name(session, checkpoint, config, addr));
+
+err: __wt_free(session, config);
+ return (ret);
+}
+
+/*
+ * __ckpt_set --
+ * Set a file's checkpoint.
+ */
+static int
+__ckpt_set(WT_SESSION_IMPL *session, const char *name, const char *v)
+{
+ WT_DECL_RET;
+ const char *config, *cfg[3], *newcfg;
+
+ config = newcfg = NULL;
+
+ /* Retrieve the metadata for this file. */
+ WT_ERR(__wt_metadata_read(session, name, &config));
+
+ /* Replace the checkpoint entry. */
+ cfg[0] = config;
+ cfg[1] = v == NULL ? "checkpoint=()" : v;
+ cfg[2] = NULL;
+ WT_ERR(__wt_config_collapse(session, cfg, &newcfg));
+ WT_ERR(__wt_metadata_update(session, name, newcfg));
+
+err: __wt_free(session, config);
+ __wt_free(session, newcfg);
+ return (ret);
+}
+
+/*
+ * __ckpt_get_name --
+ * Return the cookie associated with a file's named checkpoint.
+ */
+static int
+__ckpt_get_name(WT_SESSION_IMPL *session,
+ const char *name, const char *config, WT_ITEM *addr)
+{
+ WT_CONFIG ckptconf;
+ WT_CONFIG_ITEM a, k, v;
+
+ WT_RET(__wt_config_getones(session, config, "checkpoint", &v));
+ WT_RET(__wt_config_subinit(session, &ckptconf, &v));
+ while (__wt_config_next(&ckptconf, &k, &v) == 0)
+ if (strlen(name) == k.len && strncmp(name, k.str, k.len) == 0) {
+ WT_RET(__wt_config_subgets(session, &v, "addr", &a));
+ WT_RET(__wt_nhex_to_raw(session, a.str, a.len, addr));
+ return (0);
+ }
+ return (WT_NOTFOUND);
+}
+
+/*
+ * __ckpt_get_last --
+ * Return the cookie associated with the file's last checkpoint.
+ */
+static int
+__ckpt_get_last(
+ WT_SESSION_IMPL *session, const char *config, WT_ITEM *addr)
+{
+ WT_CONFIG ckptconf;
+ WT_CONFIG_ITEM a, k, v;
+ int64_t found;
+
+ WT_RET(__wt_config_getones(session, config, "checkpoint", &v));
+ WT_RET(__wt_config_subinit(session, &ckptconf, &v));
+ for (found = 0; __wt_config_next(&ckptconf, &k, &v) == 0;) {
+ if (found) {
+ WT_RET(__wt_config_subgets(session, &v, "order", &a));
+ if (a.val < found)
+ continue;
+ }
+
+ WT_RET(__wt_config_subgets(session, &v, "addr", &a));
+ if (a.len == 0)
+ WT_RET(EINVAL);
+
+ /* Our caller wants the raw cookie, not the hex. */
+ WT_RET(__wt_nhex_to_raw(session, a.str, a.len, addr));
+ WT_RET(__wt_config_subgets(session, &v, "order", &a));
+ found = a.val;
+ }
+
+ return (found ? 0 : WT_NOTFOUND);
+}
+
+/*
+ * __ckpt_compare_order --
+ * Qsort comparison routine for the checkpoint list.
+ */
+static int
+__ckpt_compare_order(const void *a, const void *b)
+{
+ WT_CKPT *ackpt, *bckpt;
+
+ ackpt = (WT_CKPT *)a;
+ bckpt = (WT_CKPT *)b;
+
+ return (ackpt->order > bckpt->order ? 1 : -1);
+}
+
+/*
+ * __wt_meta_ckptlist_get --
+ * Load all available checkpoint information for a file.
+ */
+int
+__wt_meta_ckptlist_get(
+ WT_SESSION_IMPL *session, const char *name, WT_CKPT **ckptbasep)
+{
+ WT_CKPT *ckpt, *ckptbase;
+ WT_CONFIG ckptconf;
+ WT_CONFIG_ITEM a, k, v;
+ WT_DECL_RET;
+ WT_ITEM *buf;
+ size_t allocated, slot;
+ const char *config;
+ char timebuf[64];
+
+ *ckptbasep = NULL;
+
+ buf = NULL;
+ ckptbase = NULL;
+ allocated = slot = 0;
+ config = NULL;
+
+ /* Retrieve the metadata information for the file. */
+ WT_RET(__wt_metadata_read(session, name, &config));
+
+ /* Load any existing checkpoints into the array. */
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ if (__wt_config_getones(session, config, "checkpoint", &v) == 0 &&
+ __wt_config_subinit(session, &ckptconf, &v) == 0)
+ for (; __wt_config_next(&ckptconf, &k, &v) == 0; ++slot) {
+ if (slot * sizeof(WT_CKPT) == allocated)
+ WT_ERR(__wt_realloc(session, &allocated,
+ (slot + 50) * sizeof(WT_CKPT), &ckptbase));
+ ckpt = &ckptbase[slot];
+
+ /*
+ * Copy the name, address (raw and hex), order and time
+ * into the slot.
+ */
+ WT_ERR(
+ __wt_strndup(session, k.str, k.len, &ckpt->name));
+
+ WT_ERR(__wt_config_subgets(session, &v, "addr", &a));
+ if (a.len == 0)
+ goto format;
+ WT_ERR(__wt_buf_set(
+ session, &ckpt->addr, a.str, a.len));
+ WT_ERR(__wt_nhex_to_raw(
+ session, a.str, a.len, &ckpt->raw));
+
+ WT_ERR(__wt_config_subgets(session, &v, "order", &a));
+ if (a.val == 0)
+ goto format;
+ ckpt->order = a.val;
+
+ WT_ERR(__wt_config_subgets(session, &v, "time", &a));
+ if (a.len == 0)
+ goto format;
+ if (a.len > sizeof(timebuf) - 1)
+ goto format;
+ memcpy(timebuf, a.str, a.len);
+ timebuf[a.len] = '\0';
+ if (sscanf(timebuf, "%" SCNuMAX, &ckpt->sec) != 1)
+ goto format;
+
+ WT_ERR(__wt_config_subgets(session, &v, "size", &a));
+ ckpt->ckpt_size = (uint64_t)a.val;
+ }
+
+ /*
+ * Allocate an extra slot for a new value, plus a slot to mark the end.
+ *
+ * This isn't very clean, but there's necessary cooperation between the
+ * schema layer (that maintains the list of checkpoints), the btree
+ * layer (that knows when the root page is written, creating a new
+ * checkpoint), and the block manager (which actually creates the
+ * checkpoint). All of that cooperation is handled in the WT_CKPT
+ * structure referenced from the WT_BTREE structure.
+ */
+ if ((slot + 2) * sizeof(WT_CKPT) >= allocated)
+ WT_ERR(__wt_realloc(session, &allocated,
+ (slot + 2) * sizeof(WT_CKPT), &ckptbase));
+
+ /* Sort in creation-order. */
+ qsort(ckptbase, slot, sizeof(WT_CKPT), __ckpt_compare_order);
+
+ /* Return the array to our caller. */
+ *ckptbasep = ckptbase;
+
+ if (0) {
+format: WT_ERR_MSG(session, WT_ERROR, "corrupted checkpoint list");
+err: __wt_meta_ckptlist_free(session, ckptbase);
+ }
+ __wt_free(session, config);
+ __wt_scr_free(&buf);
+
+ return (ret);
+}
+
+/*
+ * __wt_meta_ckptlist_set --
+ * Set a file's checkpoint value from the WT_CKPT list.
+ */
+int
+__wt_meta_ckptlist_set(
+ WT_SESSION_IMPL *session, const char *name, WT_CKPT *ckptbase)
+{
+ WT_CKPT *ckpt;
+ WT_DECL_RET;
+ WT_ITEM *buf;
+ int64_t order;
+ const char *sep;
+
+ buf = NULL;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ order = 0;
+ sep = "";
+ WT_ERR(__wt_buf_fmt(session, buf, "checkpoint=("));
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ /* Skip deleted checkpoints. */
+ if (F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+
+ /*
+ * Track the largest active checkpoint counter: it's not really
+ * a generational number or an ID because we reset it to 1 if
+ * the checkpoint we're writing is the only checkpoint the file
+ * has. The problem we're solving is when two checkpoints are
+ * taken quickly, the timer may not be unique and/or we can even
+ * see time travel on the second checkpoint if we read the time
+ * in-between nanoseconds rolling over. All we need to know
+ * is the real checkpoint order so we don't accidentally take
+ * the wrong "last" checkpoint.
+ */
+ if (ckpt->order > order)
+ order = ckpt->order;
+
+ if (F_ISSET(ckpt, WT_CKPT_ADD | WT_CKPT_UPDATE)) {
+ /* Convert the raw cookie to a hex string. */
+ WT_ERR(__wt_raw_to_hex(session,
+ ckpt->raw.data, ckpt->raw.size, &ckpt->addr));
+
+ if (F_ISSET(ckpt, WT_CKPT_ADD))
+ ckpt->order = order + 1;
+ }
+ WT_ERR(__wt_buf_catfmt(session, buf,
+ "%s%s=(addr=\"%.*s\",order=%" PRIu64
+ ",time=%" PRIuMAX ",size=%" PRIu64 ")",
+ sep, ckpt->name,
+ (int)ckpt->addr.size, (char *)ckpt->addr.data,
+ ckpt->order, ckpt->sec, ckpt->ckpt_size));
+ sep = ",";
+ }
+ WT_ERR(__wt_buf_catfmt(session, buf, ")"));
+ WT_ERR(__ckpt_set(session, name, buf->mem));
+
+err: __wt_scr_free(&buf);
+
+ return (ret);
+}
+
+/*
+ * __wt_meta_ckptlist_free --
+ * Discard the checkpoint array.
+ */
+void
+__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase)
+{
+ WT_CKPT *ckpt;
+
+ if (ckptbase == NULL)
+ return;
+
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ __wt_free(session, ckpt->name);
+ __wt_buf_free(session, &ckpt->addr);
+ __wt_buf_free(session, &ckpt->raw);
+ __wt_free(session, ckpt->bpriv);
+ }
+ __wt_free(session, ckptbase);
+}
+
+/*
+ * __ckpt_version_chk --
+ * Check the version major/minor numbers.
+ */
+static int
+__ckpt_version_chk(
+ WT_SESSION_IMPL *session, const char *name, const char *config)
+{
+ WT_CONFIG_ITEM a, v;
+ int majorv, minorv;
+
+ WT_RET(__wt_config_getones(session, config, "version", &v));
+ WT_RET(__wt_config_subgets(session, &v, "major", &a));
+ majorv = (int)a.val;
+ WT_RET(__wt_config_subgets(session, &v, "minor", &a));
+ minorv = (int)a.val;
+
+ if (majorv > WT_BTREE_MAJOR_VERSION ||
+ (majorv == WT_BTREE_MAJOR_VERSION &&
+ minorv > WT_BTREE_MINOR_VERSION))
+ WT_RET_MSG(session, EACCES,
+ "%s is an unsupported version of a WiredTiger file",
+ name);
+ return (0);
+}
diff --git a/src/meta/meta_snapshot.c b/src/meta/meta_snapshot.c
deleted file mode 100644
index 60642e7f53c..00000000000
--- a/src/meta/meta_snapshot.c
+++ /dev/null
@@ -1,410 +0,0 @@
-/*-
- * Copyright (c) 2008-2012 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-static int __snap_get(
- WT_SESSION_IMPL *, const char *, const char *, WT_ITEM *);
-static int __snap_get_last(WT_SESSION_IMPL *, const char *, WT_ITEM *);
-static int __snap_get_name(
- WT_SESSION_IMPL *, const char *, const char *, WT_ITEM *);
-static int __snap_set(WT_SESSION_IMPL *, const char *, const char *);
-static int __snap_version_chk(WT_SESSION_IMPL *, const char *, const char *);
-
-/*
- * __wt_meta_snapshot_get --
- * Get the file's most recent snapshot address.
- */
-int
-__wt_meta_snapshot_get(WT_SESSION_IMPL *session,
- const char *name, const char *snapshot, WT_ITEM *addr)
-{
- WT_DECL_RET;
-
- /* Get the snapshot address. */
- ret = __snap_get(session, name, snapshot, addr);
-
- /*
- * If we find a snapshot, check the version and return the address.
- * If we don't find a named snapshot, we're done, they're read-only.
- * If we don't find a default snapshot, it's creation, return "no
- * data" and let our caller handle it.
- */
- if (ret == WT_NOTFOUND) {
- /*
- * If the caller didn't give us a specific snapshot name, we
- * assume it's a creation and there isn't a snapshot to find.
- * Let the caller deal with the failure.
- */
- if (snapshot != NULL)
- WT_RET_MSG(session, WT_NOTFOUND,
- "no \"%s\" snapshot found in %s",
- snapshot, name);
-
- addr->data = NULL;
- addr->size = 0;
- }
- return (0);
-}
-
-/*
- * __wt_meta_snapshot_clear --
- * Clear a file's snapshot.
- */
-int
-__wt_meta_snapshot_clear(WT_SESSION_IMPL *session, const char *name)
-{
- WT_DECL_RET;
-
- ret = __snap_set(session, name, NULL);
-
- /*
- * If we are unrolling a failed create, we may have already removed the
- * metadata entry. If no entry is found to update and we're trying to
- * clear the snapshot, just ignore it.
- */
- if (ret == WT_NOTFOUND)
- ret = 0;
- return (ret);
-}
-
-/*
- * __snap_get --
- * Get a file's snapshot.
- */
-static int
-__snap_get(WT_SESSION_IMPL *session,
- const char *name, const char *snapshot, WT_ITEM *addr)
-{
- WT_DECL_RET;
- const char *config;
-
- config = NULL;
-
- /* Retrieve the metadata entry for the file. */
- WT_ERR(__wt_metadata_read(session, name, &config));
-
- /* Check the major/minor version numbers. */
- WT_ERR(__snap_version_chk(session, name, config));
-
- /* Retrieve the named snapshot or the last snapshot. */
- if (snapshot == NULL)
- WT_ERR(__snap_get_last(session, config, addr));
- else
- WT_ERR(__snap_get_name(session, snapshot, config, addr));
-
-err: __wt_free(session, config);
- return (ret);
-}
-
-/*
- * __snap_set --
- * Set a file's snapshot.
- */
-static int
-__snap_set(WT_SESSION_IMPL *session, const char *name, const char *v)
-{
- WT_DECL_RET;
- const char *config, *cfg[3], *newcfg;
-
- config = newcfg = NULL;
-
- /* Retrieve the metadata for this file. */
- WT_ERR(__wt_metadata_read(session, name, &config));
-
- /* Replace the snapshot entry. */
- cfg[0] = config;
- cfg[1] = v == NULL ? "snapshot=()" : v;
- cfg[2] = NULL;
- WT_ERR(__wt_config_collapse(session, cfg, &newcfg));
- WT_ERR(__wt_metadata_update(session, name, newcfg));
-
-err: __wt_free(session, config);
- __wt_free(session, newcfg);
- return (ret);
-}
-
-/*
- * __snap_get_name --
- * Return the cookie associated with a file's named snapshot.
- */
-static int
-__snap_get_name(WT_SESSION_IMPL *session,
- const char *name, const char *config, WT_ITEM *addr)
-{
- WT_CONFIG snapconf;
- WT_CONFIG_ITEM a, k, v;
-
- WT_RET(__wt_config_getones(session, config, "snapshot", &v));
- WT_RET(__wt_config_subinit(session, &snapconf, &v));
- while (__wt_config_next(&snapconf, &k, &v) == 0)
- if (strlen(name) == k.len && strncmp(name, k.str, k.len) == 0) {
- WT_RET(__wt_config_subgets(session, &v, "addr", &a));
- WT_RET(__wt_nhex_to_raw(session, a.str, a.len, addr));
- return (0);
- }
- return (WT_NOTFOUND);
-}
-
-/*
- * __snap_get_last --
- * Return the cookie associated with the file's last snapshot.
- */
-static int
-__snap_get_last(
- WT_SESSION_IMPL *session, const char *config, WT_ITEM *addr)
-{
- WT_CONFIG snapconf;
- WT_CONFIG_ITEM a, k, v;
- int64_t found;
-
- WT_RET(__wt_config_getones(session, config, "snapshot", &v));
- WT_RET(__wt_config_subinit(session, &snapconf, &v));
- for (found = 0; __wt_config_next(&snapconf, &k, &v) == 0;) {
- if (found) {
- WT_RET(__wt_config_subgets(session, &v, "order", &a));
- if (a.val < found)
- continue;
- }
-
- WT_RET(__wt_config_subgets(session, &v, "addr", &a));
- if (a.len == 0)
- WT_RET(EINVAL);
-
- /* Our caller wants the raw cookie, not the hex. */
- WT_RET(__wt_nhex_to_raw(session, a.str, a.len, addr));
- WT_RET(__wt_config_subgets(session, &v, "order", &a));
- found = a.val;
- }
-
- return (found ? 0 : WT_NOTFOUND);
-}
-
-/*
- * __snap_compare_order --
- * Qsort comparison routine for the snapshot list.
- */
-static int
-__snap_compare_order(const void *a, const void *b)
-{
- WT_SNAPSHOT *asnap, *bsnap;
-
- asnap = (WT_SNAPSHOT *)a;
- bsnap = (WT_SNAPSHOT *)b;
-
- return (asnap->order > bsnap->order ? 1 : -1);
-}
-
-/*
- * __wt_meta_snaplist_get --
- * Load all available snapshot information for a file.
- */
-int
-__wt_meta_snaplist_get(
- WT_SESSION_IMPL *session, const char *name, WT_SNAPSHOT **snapbasep)
-{
- WT_CONFIG snapconf;
- WT_CONFIG_ITEM a, k, v;
- WT_DECL_RET;
- WT_ITEM *buf;
- WT_SNAPSHOT *snap, *snapbase;
- size_t allocated, slot;
- const char *config;
- char timebuf[64];
-
- *snapbasep = NULL;
-
- buf = NULL;
- snapbase = NULL;
- allocated = slot = 0;
- config = NULL;
-
- /* Retrieve the metadata information for the file. */
- WT_RET(__wt_metadata_read(session, name, &config));
-
- /* Load any existing snapshots into the array. */
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- if (__wt_config_getones(session, config, "snapshot", &v) == 0 &&
- __wt_config_subinit(session, &snapconf, &v) == 0)
- for (; __wt_config_next(&snapconf, &k, &v) == 0; ++slot) {
- if (slot * sizeof(WT_SNAPSHOT) == allocated)
- WT_ERR(__wt_realloc(session, &allocated,
- (slot + 50) * sizeof(WT_SNAPSHOT),
- &snapbase));
- snap = &snapbase[slot];
-
- /*
- * Copy the name, address (raw and hex), order and time
- * into the slot.
- */
- WT_ERR(
- __wt_strndup(session, k.str, k.len, &snap->name));
-
- WT_ERR(__wt_config_subgets(session, &v, "addr", &a));
- if (a.len == 0)
- goto format;
- WT_ERR(__wt_buf_set(
- session, &snap->addr, a.str, a.len));
- WT_ERR(__wt_nhex_to_raw(
- session, a.str, a.len, &snap->raw));
-
- WT_ERR(__wt_config_subgets(session, &v, "order", &a));
- if (a.val == 0)
- goto format;
- snap->order = a.val;
-
- WT_ERR(__wt_config_subgets(session, &v, "time", &a));
- if (a.len == 0)
- goto format;
- if (a.len > sizeof(timebuf) - 1)
- goto format;
- memcpy(timebuf, a.str, a.len);
- timebuf[a.len] = '\0';
- if (sscanf(timebuf, "%" SCNuMAX, &snap->sec) != 1)
- goto format;
-
- WT_ERR(__wt_config_subgets(session, &v, "size", &a));
- snap->snapshot_size = (uint64_t)a.val;
- }
-
- /*
- * Allocate an extra slot for a new value, plus a slot to mark the end.
- *
- * This isn't very clean, but there's necessary cooperation between the
- * schema layer (that maintains the list of snapshots), the btree layer
- * (that knows when the root page is written, creating a new snapshot),
- * and the block manager (which actually creates the snapshot). All of
- * that cooperation is handled in the WT_SNAPSHOT structure referenced
- * from the WT_BTREE structure.
- */
- if ((slot + 2) * sizeof(WT_SNAPSHOT) >= allocated)
- WT_ERR(__wt_realloc(session, &allocated,
- (slot + 2) * sizeof(WT_SNAPSHOT), &snapbase));
-
- /* Sort in creation-order. */
- qsort(snapbase, slot, sizeof(WT_SNAPSHOT), __snap_compare_order);
-
- /* Return the array to our caller. */
- *snapbasep = snapbase;
-
- if (0) {
-format: WT_ERR_MSG(session, WT_ERROR, "corrupted snapshot list");
-err: __wt_meta_snaplist_free(session, snapbase);
- }
- __wt_free(session, config);
- __wt_scr_free(&buf);
-
- return (ret);
-}
-
-/*
- * __wt_meta_snaplist_set --
- * Set a file's snapshot value from the WT_SNAPSHOT list.
- */
-int
-__wt_meta_snaplist_set(
- WT_SESSION_IMPL *session, const char *name, WT_SNAPSHOT *snapbase)
-{
- WT_DECL_RET;
- WT_ITEM *buf;
- WT_SNAPSHOT *snap;
- int64_t order;
- const char *sep;
-
- buf = NULL;
-
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- order = 0;
- sep = "";
- WT_ERR(__wt_buf_fmt(session, buf, "snapshot=("));
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
- /* Skip deleted snapshots. */
- if (F_ISSET(snap, WT_SNAP_DELETE))
- continue;
-
- /*
- * Track the largest active snapshot counter: it's not really
- * a generational number or an ID because we reset it to 1 if
- * the snapshot we're writing is the only snapshot the file has.
- * The problem we're solving is when two snapshots are taken
- * quickly, the timer may not be unique and/or we can even see
- * time travel on the second snapshot if we read the time
- * in-between nanoseconds rolling over. All we need to know
- * is the real snapshot order so we don't accidentally take the
- * wrong "last" snapshot.
- */
- if (snap->order > order)
- order = snap->order;
-
- if (F_ISSET(snap, WT_SNAP_ADD | WT_SNAP_UPDATE)) {
- /* Convert the raw cookie to a hex string. */
- WT_ERR(__wt_raw_to_hex(session,
- snap->raw.data, snap->raw.size, &snap->addr));
-
- if (F_ISSET(snap, WT_SNAP_ADD))
- snap->order = order + 1;
- }
- WT_ERR(__wt_buf_catfmt(session, buf,
- "%s%s=(addr=\"%.*s\",order=%" PRIu64
- ",time=%" PRIuMAX ",size=%" PRIu64 ")",
- sep, snap->name,
- (int)snap->addr.size, (char *)snap->addr.data,
- snap->order, snap->sec, snap->snapshot_size));
- sep = ",";
- }
- WT_ERR(__wt_buf_catfmt(session, buf, ")"));
- WT_ERR(__snap_set(session, name, buf->mem));
-
-err: __wt_scr_free(&buf);
-
- return (ret);
-}
-
-/*
- * __wt_meta_snaplist_free --
- * Discard the snapshot array.
- */
-void
-__wt_meta_snaplist_free(WT_SESSION_IMPL *session, WT_SNAPSHOT *snapbase)
-{
- WT_SNAPSHOT *snap;
- if (snapbase == NULL)
- return;
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
- __wt_free(session, snap->name);
- __wt_buf_free(session, &snap->addr);
- __wt_buf_free(session, &snap->raw);
- __wt_free(session, snap->bpriv);
- }
- __wt_free(session, snapbase);
-}
-
-/*
- * __snap_version_chk --
- * Check the version major/minor numbers.
- */
-static int
-__snap_version_chk(
- WT_SESSION_IMPL *session, const char *name, const char *config)
-{
- WT_CONFIG_ITEM a, v;
- int majorv, minorv;
-
- WT_RET(__wt_config_getones(session, config, "version", &v));
- WT_RET(__wt_config_subgets(session, &v, "major", &a));
- majorv = (int)a.val;
- WT_RET(__wt_config_subgets(session, &v, "minor", &a));
- minorv = (int)a.val;
-
- if (majorv > WT_BTREE_MAJOR_VERSION ||
- (majorv == WT_BTREE_MAJOR_VERSION &&
- minorv > WT_BTREE_MINOR_VERSION))
- WT_RET_MSG(session, EACCES,
- "%s is an unsupported version of a WiredTiger file",
- name);
- return (0);
-}
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index 9f17789a1cc..03cc8150e1b 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -106,9 +106,9 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
saved_btree = session->btree;
session->btree = trk->btree;
if (!unroll)
- WT_TRET(__wt_bm_snapshot_resolve(session, NULL));
- /* Release the snapshot lock */
- __wt_rwunlock(session, session->btree->snaplock);
+ WT_TRET(__wt_bm_checkpoint_resolve(session));
+ /* Release the checkpoint lock */
+ __wt_rwunlock(session, session->btree->ckptlock);
session->btree = saved_btree;
break;
case WT_ST_LOCK: /* Handle lock, see above */
diff --git a/src/schema/schema_truncate.c b/src/schema/schema_truncate.c
index bd8a64736b4..6bb83642be7 100644
--- a/src/schema/schema_truncate.c
+++ b/src/schema/schema_truncate.c
@@ -24,7 +24,7 @@ __truncate_file(WT_SESSION_IMPL *session, const char *name)
WT_RET(__wt_conn_btree_close_all(session, name));
/* Delete the root address and truncate the file. */
- WT_RET(__wt_meta_snapshot_clear(session, name));
+ WT_RET(__wt_meta_checkpoint_clear(session, name));
WT_RET(__wt_btree_truncate(session, filename));
return (0);
diff --git a/src/session/session_btree.c b/src/session/session_btree.c
index abcc3efa16e..83d74201905 100644
--- a/src/session/session_btree.c
+++ b/src/session/session_btree.c
@@ -159,10 +159,10 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
btree = btree_session->btree;
if (strcmp(uri, btree->name) != 0)
continue;
- if ((ckpt == NULL && btree->ckpt == NULL) ||
- (ckpt != NULL && btree->ckpt != NULL &&
- (strncmp(ckpt, btree->ckpt, ckptlen) == 0 &&
- btree->ckpt[ckptlen] == '\0')))
+ if ((ckpt == NULL && btree->checkpoint == NULL) ||
+ (ckpt != NULL && btree->checkpoint != NULL &&
+ (strncmp(ckpt, btree->checkpoint, ckptlen) == 0 &&
+ btree->checkpoint[ckptlen] == '\0')))
break;
}
@@ -201,12 +201,12 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
}
/*
- * __wt_session_lock_snapshot --
- * Lock the btree handle for the given snapshot name.
+ * __wt_session_lock_checkpoint --
+ * Lock the btree handle for the given checkpoint name.
*/
int
-__wt_session_lock_snapshot(
- WT_SESSION_IMPL *session, const char *snapshot, uint32_t flags)
+__wt_session_lock_checkpoint(
+ WT_SESSION_IMPL *session, const char *checkpoint, uint32_t flags)
{
WT_BTREE *btree;
WT_DECL_RET;
@@ -217,7 +217,7 @@ __wt_session_lock_snapshot(
btree = session->btree;
WT_ERR(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf, "checkpoint=\"%s\"", snapshot));
+ WT_ERR(__wt_buf_fmt(session, buf, "checkpoint=\"%s\"", checkpoint));
cfg[0] = buf->data;
LF_SET(WT_BTREE_LOCK_ONLY);
diff --git a/src/session/session_salvage.c b/src/session/session_salvage.c
index 87fb2d36854..4f45115423b 100644
--- a/src/session/session_salvage.c
+++ b/src/session/session_salvage.c
@@ -15,43 +15,43 @@ int
__wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_BTREE *btree;
+ WT_CKPT *ckptbase;
WT_DECL_RET;
- WT_SNAPSHOT *snapbase;
btree = session->btree;
/*
* XXX
- * The salvage process reads and discards previous snapshot blocks, so
- * the underlying block manager has to ignore any previous snapshot
- * entries when creating a new snapshot, in other words, we can't use
- * the metadata snapshot list, it has all of those snapshots listed and
- * we don't care about them. Build a clean snapshot array and use it
- * instead.
+ * The salvage process reads and discards previous checkpoints, so the
+ * underlying block manager has to ignore any previous checkpoint
+ * entries when creating a new checkpoint, in other words, we can't use
+ * the metadata checkpoint list, it has all of those checkpoint listed
+ * and we don't care about them. Build a clean checkpoint list and use
+ * it instead.
*
- * Don't first clear the metadata snapshot list and call the snapshot
- * get routine: a crash between clearing the metadata snapshot list and
- * creating a new snapshot list would look like a create or open of a
- * file without a snapshot from which to roll-forward, and the contents
- * of the file would be discarded.
+ * Don't first clear the metadata checkpoint list and call the function
+ * to get a list of checkpoints: a crash between clearing the metadata
+ * checkpoint list and creating a new checkpoint list would look like a
+ * create or open of a file without a checkpoint to roll-forward from,
+ * and the contents of the file would be discarded.
*/
- WT_RET(__wt_calloc_def(session, 2, &snapbase));
- WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snapbase[0].name));
- F_SET(&snapbase[0], WT_SNAP_ADD);
+ WT_RET(__wt_calloc_def(session, 2, &ckptbase));
+ WT_ERR(__wt_strdup(session, WT_INTERNAL_CHKPT, &ckptbase[0].name));
+ F_SET(&ckptbase[0], WT_CKPT_ADD);
- WT_ERR(__wt_bt_salvage(session, snapbase, cfg));
+ WT_ERR(__wt_bt_salvage(session, ckptbase, cfg));
/*
- * If no snapshot was created, well, it's probably bad news, but there
- * is nothing to do but clear any recorded snapshots for the file. If
- * a snapshot was created, life is good, replace any recorded snapshots
- * with the new one.
+ * If no checkpoint was created, well, it's probably bad news, but there
+ * is nothing to do but clear any recorded checkpoints for the file. If
+ * a checkpoint was created, life is good, replace any existing list of
+ * checkpoints with the single new one.
*/
- if (snapbase[0].raw.data == NULL)
- WT_ERR(__wt_meta_snapshot_clear(session, btree->name));
+ if (ckptbase[0].raw.data == NULL)
+ WT_ERR(__wt_meta_checkpoint_clear(session, btree->name));
else
- WT_ERR(__wt_meta_snaplist_set(session, btree->name, snapbase));
+ WT_ERR(__wt_meta_ckptlist_set(session, btree->name, ckptbase));
-err: __wt_meta_snaplist_free(session, snapbase);
+err: __wt_meta_ckptlist_free(session, ckptbase);
return (ret);
}
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 09f9d8fe42c..b71a4e6cf84 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -129,11 +129,11 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
}
/*
- * __txn_release --
+ * __wt_txn_release --
* Release the resources associated with the current transaction.
*/
-static int
-__txn_release(WT_SESSION_IMPL *session)
+int
+__wt_txn_release(WT_SESSION_IMPL *session)
{
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
@@ -162,7 +162,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_UNUSED(cfg);
- return (__txn_release(session));
+ return (__wt_txn_release(session));
}
/*
@@ -182,107 +182,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
for (i = 0, m = txn->mod; i < txn->mod_count; i++, m++)
**m = WT_TXN_ABORTED;
- return (__txn_release(session));
-}
-
-/*
- * __wt_txn_checkpoint --
- * Checkpoint a database or a list of objects in the database.
- */
-int
-__wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
-{
- WT_CONFIG targetconf;
- WT_CONFIG_ITEM cval, k, v;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
- int target_list, tracking;
- const char *txn_cfg[] = { "isolation=snapshot", NULL };
-
- target_list = tracking = 0;
- txn_global = &S2C(session)->txn_global;
-
- /* Only one checkpoint can be active at a time. */
- __wt_writelock(session, S2C(session)->ckpt_rwlock);
- WT_ERR(__wt_txn_begin(session, txn_cfg));
-
- /* Prevent eviction from evicting anything newer than this. */
- txn_global->ckpt_txnid = session->txn.snap_min;
-
- WT_ERR(__wt_meta_track_on(session));
- tracking = 1;
-
- /* Step through the list of targets and snapshot each one. */
- cval.len = 0;
- WT_ERR(__wt_config_gets(session, cfg, "target", &cval));
- if (cval.len != 0) {
- WT_ERR(__wt_scr_alloc(session, 512, &tmp));
- WT_ERR(__wt_config_subinit(session, &targetconf, &cval));
- while ((ret = __wt_config_next(&targetconf, &k, &v)) == 0) {
- target_list = 1;
- WT_ERR(__wt_buf_fmt(session, tmp, "%.*s",
- (int)k.len, k.str));
-
- if (v.len != 0)
- WT_ERR_MSG(session, EINVAL,
- "invalid checkpoint target \"%s\": "
- "URIs may require quoting",
- (const char *)tmp->data);
-
- __wt_spin_lock(session, &S2C(session)->schema_lock);
- ret = __wt_schema_worker(
- session, tmp->data, __wt_snapshot, cfg, 0);
- __wt_spin_unlock(session, &S2C(session)->schema_lock);
-
- if (ret != 0)
- WT_ERR_MSG(session, ret, "%s",
- (const char *)tmp->data);
- }
- if (ret == WT_NOTFOUND)
- ret = 0;
- }
-
- if (!target_list) {
- /*
- * Possible checkpoint snapshot name. If snapshots are named,
- * we must snapshot both open and closed files; if snapshots
- * are not named, we only snapshot open files.
- *
- * XXX
- * We don't optimize unnamed checkpoints of a list of targets,
- * we open the targets and snapshot them even if they are
- * quiescent and don't need a snapshot, believing applications
- * unlikely to checkpoint a list of closed targets.
- */
- cval.len = 0;
- WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
- WT_ERR(cval.len == 0 ?
- __wt_conn_btree_apply(session, __wt_snapshot, cfg) :
- __wt_meta_btree_apply(session, __wt_snapshot, cfg, 0));
- }
-
-err: /*
- * XXX Rolling back the changes here is problematic.
- *
- * If we unroll here, we need a way to roll back changes to the avail
- * list for each tree that was successfully synced before the error
- * occurred. Otherwise, the next time we try this operation, we will
- * try to free an old snapshot again.
- *
- * OTOH, if we commit the changes after a failure, we have partially
- * overwritten the checkpoint, so what ends up on disk is not
- * consistent.
- */
- if (tracking)
- WT_TRET(__wt_meta_track_off(session, ret != 0));
-
- txn_global->ckpt_txnid = WT_TXN_NONE;
- if (F_ISSET(&session->txn, TXN_RUNNING))
- WT_TRET(__txn_release(session));
- __wt_rwunlock(session, S2C(session)->ckpt_rwlock);
- __wt_scr_free(&tmp);
- return (ret);
+ return (__wt_txn_release(session));
}
/*
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
new file mode 100644
index 00000000000..d60a1938609
--- /dev/null
+++ b/src/txn/txn_ckpt.c
@@ -0,0 +1,333 @@
+/*-
+ * Copyright (c) 2008-2012 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_txn_checkpoint --
+ * Checkpoint a database or a list of objects in the database.
+ */
+int
+__wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CONFIG targetconf;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
+ int target_list, tracking;
+ const char *txn_cfg[] = { "isolation=snapshot", NULL };
+
+ target_list = tracking = 0;
+ txn_global = &S2C(session)->txn_global;
+
+ /* Only one checkpoint can be active at a time. */
+ __wt_writelock(session, S2C(session)->ckpt_rwlock);
+ WT_ERR(__wt_txn_begin(session, txn_cfg));
+
+ /* Prevent eviction from evicting anything newer than this. */
+ txn_global->ckpt_txnid = session->txn.snap_min;
+
+ WT_ERR(__wt_meta_track_on(session));
+ tracking = 1;
+
+ /* Step through the list of targets and checkpoint each one. */
+ cval.len = 0;
+ WT_ERR(__wt_config_gets(session, cfg, "target", &cval));
+ if (cval.len != 0) {
+ WT_ERR(__wt_scr_alloc(session, 512, &tmp));
+ WT_ERR(__wt_config_subinit(session, &targetconf, &cval));
+ while ((ret = __wt_config_next(&targetconf, &k, &v)) == 0) {
+ target_list = 1;
+ WT_ERR(__wt_buf_fmt(session, tmp, "%.*s",
+ (int)k.len, k.str));
+
+ if (v.len != 0)
+ WT_ERR_MSG(session, EINVAL,
+ "invalid checkpoint target \"%s\": "
+ "URIs may require quoting",
+ (const char *)tmp->data);
+
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
+ ret = __wt_schema_worker(
+ session, tmp->data, __wt_checkpoint, cfg, 0);
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
+
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s",
+ (const char *)tmp->data);
+ }
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ }
+
+ if (!target_list) {
+ /*
+ * Possible checkpoint name. If checkpoints are named, we must
+ * checkpoint both open and closed files; if checkpoints are not
+ * named, we only checkpoint open files.
+ *
+ * XXX
+ * We don't optimize unnamed checkpoints of a list of targets,
+ * we open the targets and checkpoint them even if they are
+ * quiescent and don't need a checkpoint, believing applications
+ * unlikely to checkpoint a list of closed targets.
+ */
+ cval.len = 0;
+ WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
+ WT_ERR(cval.len == 0 ?
+ __wt_conn_btree_apply(session, __wt_checkpoint, cfg) :
+ __wt_meta_btree_apply(session, __wt_checkpoint, cfg, 0));
+ }
+
+err: /*
+ * XXX Rolling back the changes here is problematic.
+ *
+ * If we unroll here, we need a way to roll back changes to the avail
+ * list for each tree that was successfully synced before the error
+ * occurred. Otherwise, the next time we try this operation, we will
+ * try to free an old checkpoint again.
+ *
+ * OTOH, if we commit the changes after a failure, we have partially
+ * overwritten the checkpoint, so what ends up on disk is not
+ * consistent.
+ */
+ if (tracking)
+ WT_TRET(__wt_meta_track_off(session, ret != 0));
+
+ txn_global->ckpt_txnid = WT_TXN_NONE;
+ if (F_ISSET(&session->txn, TXN_RUNNING))
+ WT_TRET(__wt_txn_release(session));
+ __wt_rwunlock(session, S2C(session)->ckpt_rwlock);
+ __wt_scr_free(&tmp);
+ return (ret);
+}
+
+/*
+ * __drop --
+ * Drop all checkpoints with a specific name.
+ */
+static void
+__drop(WT_CKPT *ckptbase, const char *name, size_t len)
+{
+ WT_CKPT *ckpt;
+
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ if (strlen(ckpt->name) == len &&
+ strncmp(ckpt->name, name, len) == 0)
+ F_SET(ckpt, WT_CKPT_DELETE);
+}
+
+/*
+ * __drop_from --
+ * Drop all checkpoints after, and including, the named checkpoint.
+ */
+static void
+__drop_from(WT_CKPT *ckptbase, const char *name, size_t len)
+{
+ WT_CKPT *ckpt;
+ int matched;
+
+ /*
+ * There's a special case -- if the name is "all", then we delete all
+ * of the checkpoints.
+ */
+ if (len == strlen("all") && strncmp(name, "all", len) == 0) {
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ F_SET(ckpt, WT_CKPT_DELETE);
+ return;
+ }
+
+ /*
+ * We use the first checkpoint we can find, that is, if there are two
+ * checkpoints with the same name in the list, we'll delete from the
+ * first match to the end.
+ */
+ matched = 0;
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ if (!matched &&
+ (strlen(ckpt->name) != len ||
+ strncmp(ckpt->name, name, len) != 0))
+ continue;
+
+ matched = 1;
+ F_SET(ckpt, WT_CKPT_DELETE);
+ }
+}
+
+/*
+ * __drop_to --
+ * Drop all checkpoints before, and including, the named checkpoint.
+ */
+static void
+__drop_to(WT_CKPT *ckptbase, const char *name, size_t len)
+{
+ WT_CKPT *ckpt, *mark;
+
+ /*
+ * We use the last checkpoint we can find, that is, if there are two
+ * checkpoints with the same name in the list, we'll delete from the
+ * beginning to the second match, not the first.
+ */
+ mark = NULL;
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ if (strlen(ckpt->name) == len &&
+ strncmp(ckpt->name, name, len) == 0)
+ mark = ckpt;
+
+ if (mark == NULL)
+ return;
+
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ F_SET(ckpt, WT_CKPT_DELETE);
+
+ if (ckpt == mark)
+ break;
+ }
+}
+
+/*
+ * __wt_checkpoint --
+ * Checkpoint a tree.
+ */
+int
+__wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_BTREE *btree;
+ WT_CKPT *ckpt, *ckptbase, *deleted;
+ WT_CONFIG dropconf;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_RET;
+ const char *name;
+ char *name_alloc;
+ int force, tracked;
+
+ btree = session->btree;
+ force = tracked = 0;
+ ckpt = ckptbase = NULL;
+ name_alloc = NULL;
+
+ /* Checkpoints are single-threaded. */
+ __wt_writelock(session, btree->ckptlock);
+
+ /*
+ * Get the list of checkpoints for this file. If there's no reference,
+ * this file is dead. Discard it from the cache without bothering to
+ * write any dirty pages.
+ */
+ if ((ret =
+ __wt_meta_ckptlist_get(session, btree->name, &ckptbase)) != 0) {
+ if (ret == WT_NOTFOUND)
+ ret = __wt_bt_cache_flush(
+ session, NULL, WT_SYNC_DISCARD_NOWRITE, 0);
+ goto err;
+ }
+
+ /*
+ * This may be a named checkpoint, check the configuration. If it's a
+ * named checkpoint, set force, we have to create the checkpoint even if
+ * the tree is clean.
+ */
+ cval.len = 0;
+ if (cfg != NULL)
+ WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
+ if (cval.len == 0)
+ name = WT_INTERNAL_CHKPT;
+ else {
+ force = 1;
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
+ name = name_alloc;
+ }
+
+ /*
+ * We may be dropping checkpoints, check the configuration. If we're
+ * dropping checkpoints, set force, we have to create the checkpoint
+ * even if the tree is clean.
+ */
+ if (cfg != NULL) {
+ cval.len = 0;
+ WT_ERR(__wt_config_gets(session, cfg, "drop", &cval));
+ if (cval.len != 0) {
+ WT_ERR(__wt_config_subinit(session, &dropconf, &cval));
+ while ((ret =
+ __wt_config_next(&dropconf, &k, &v)) == 0) {
+ force = 1;
+
+ if (v.len == 0)
+ __drop(ckptbase, k.str, k.len);
+ else if (k.len == strlen("from") &&
+ strncmp(k.str, "from", k.len) == 0)
+ __drop_from(ckptbase, v.str, v.len);
+ else if (k.len == strlen("to") &&
+ strncmp(k.str, "to", k.len) == 0)
+ __drop_to(ckptbase, v.str, v.len);
+ else
+ WT_ERR_MSG(session, EINVAL,
+ "unexpected value for checkpoint "
+ "key: %.*s",
+ (int)k.len, k.str);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+ }
+
+ /* Discard checkpoints with the same name as the new checkpoint. */
+ __drop(ckptbase, name, strlen(name));
+
+ /* Add a new checkpoint entry at the end of the list. */
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ ;
+ WT_ERR(__wt_strdup(session, name, &ckpt->name));
+ F_SET(ckpt, WT_CKPT_ADD);
+
+ /*
+ * Lock the checkpoints that will be deleted.
+ *
+ * Checkpoints are only locked when tracking is enabled, which covers
+ * sync and drop operations, but not close. The reasoning is that
+ * there should be no access to a checkpoint during close, because any
+ * thread accessing a checkpoint will also have the current file handle
+ * open.
+ */
+ if (WT_META_TRACKING(session))
+ WT_CKPT_FOREACH(ckptbase, deleted)
+ if (F_ISSET(deleted, WT_CKPT_DELETE))
+ WT_ERR(__wt_session_lock_checkpoint(session,
+ deleted->name, WT_BTREE_EXCLUSIVE));
+
+ /* Flush the file from the cache, creating the checkpoint. */
+ WT_ERR(__wt_bt_cache_flush(
+ session, ckptbase, cfg == NULL ? WT_SYNC_DISCARD : WT_SYNC, force));
+
+ /* If there was a checkpoint, update the metadata and resolve it. */
+ if (ckpt->raw.data == NULL) {
+ if (force)
+ WT_ERR_MSG(session, EINVAL,
+ "cache flush failed to create a checkpoint");
+ } else {
+ WT_ERR(__wt_meta_ckptlist_set(session, btree->name, ckptbase));
+ /*
+ * If tracking is enabled, defer making pages available until
+ * the end of the transaction. The exception is if the handle
+ * is being discarded: in that case, it will be gone by the
+ * time we try to apply or unroll the meta tracking event.
+ */
+ if (WT_META_TRACKING(session) && cfg != NULL) {
+ WT_ERR(__wt_meta_track_checkpoint(session));
+ tracked = 1;
+ } else
+ WT_ERR(__wt_bm_checkpoint_resolve(session));
+ }
+
+err: __wt_meta_ckptlist_free(session, ckptbase);
+ if (!tracked)
+ __wt_rwunlock(session, btree->ckptlock);
+
+ __wt_free(session, name_alloc);
+
+ return (ret);
+}
diff --git a/src/txn/txn_snapshot.c b/src/txn/txn_snapshot.c
deleted file mode 100644
index 85ac650c21d..00000000000
--- a/src/txn/txn_snapshot.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*-
- * Copyright (c) 2008-2012 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __drop --
- * Drop all snapshots with a specific name.
- */
-static void
-__drop(WT_SNAPSHOT *snapbase, const char *name, size_t len)
-{
- WT_SNAPSHOT *snap;
-
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- if (strlen(snap->name) == len &&
- strncmp(snap->name, name, len) == 0)
- F_SET(snap, WT_SNAP_DELETE);
-}
-
-/*
- * __drop_from --
- * Drop all snapshots after, and including, the named snapshot.
- */
-static void
-__drop_from(WT_SNAPSHOT *snapbase, const char *name, size_t len)
-{
- WT_SNAPSHOT *snap;
- int matched;
-
- /*
- * There's a special case -- if the name is "all", then we delete all
- * of the snapshots.
- */
- if (len == strlen("all") && strncmp(name, "all", len) == 0) {
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- F_SET(snap, WT_SNAP_DELETE);
- return;
- }
-
- /*
- * We use the first snapshot we can find, that is, if there are two
- * snapshots with the same name in the list, we'll delete from the
- * first match to the end.
- */
- matched = 0;
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
- if (!matched &&
- (strlen(snap->name) != len ||
- strncmp(snap->name, name, len) != 0))
- continue;
-
- matched = 1;
- F_SET(snap, WT_SNAP_DELETE);
- }
-}
-
-/*
- * __drop_to --
- * Drop all snapshots before, and including, the named snapshot.
- */
-static void
-__drop_to(WT_SNAPSHOT *snapbase, const char *name, size_t len)
-{
- WT_SNAPSHOT *mark, *snap;
-
- /*
- * We use the last snapshot we can find, that is, if there are two
- * snapshots with the same name in the list, we'll delete from the
- * beginning to the second match, not the first.
- */
- mark = NULL;
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- if (strlen(snap->name) == len &&
- strncmp(snap->name, name, len) == 0)
- mark = snap;
-
- if (mark == NULL)
- return;
-
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
- F_SET(snap, WT_SNAP_DELETE);
-
- if (snap == mark)
- break;
- }
-}
-
-/*
- * __wt_snapshot --
- * Snapshot a tree.
- */
-int
-__wt_snapshot(WT_SESSION_IMPL *session, const char *cfg[])
-{
- WT_BTREE *btree;
- WT_CONFIG dropconf;
- WT_CONFIG_ITEM cval, k, v;
- WT_DECL_RET;
- WT_SNAPSHOT *deleted, *snap, *snapbase;
- const char *name;
- char *name_alloc;
- int force, tracked;
-
- btree = session->btree;
- force = tracked = 0;
- snap = snapbase = NULL;
- name_alloc = NULL;
-
- /* Snapshots are single-threaded. */
- __wt_writelock(session, btree->snaplock);
-
- /*
- * Get the list of snapshots for this file. If there's no reference,
- * this file is dead. Discard it from the cache without bothering to
- * write any dirty pages.
- */
- if ((ret =
- __wt_meta_snaplist_get(session, btree->name, &snapbase)) != 0) {
- if (ret == WT_NOTFOUND)
- ret = __wt_bt_cache_flush(
- session, NULL, WT_SYNC_DISCARD_NOWRITE, 0);
- goto err;
- }
-
- /*
- * This may be a named snapshot, check the configuration. If it's a
- * named snapshot, set force, we have to create the snapshot even if
- * the tree is clean.
- */
- cval.len = 0;
- if (cfg != NULL)
- WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
- if (cval.len == 0)
- name = WT_INTERNAL_SNAPSHOT;
- else {
- force = 1;
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
- name = name_alloc;
- }
-
- /*
- * We may be dropping snapshots, check the configuration. If we're
- * dropping snapshots, set force, we have to create the snapshot even
- * if the tree is clean.
- */
- if (cfg != NULL) {
- cval.len = 0;
- WT_ERR(__wt_config_gets(session, cfg, "drop", &cval));
- if (cval.len != 0) {
- WT_ERR(__wt_config_subinit(session, &dropconf, &cval));
- while ((ret =
- __wt_config_next(&dropconf, &k, &v)) == 0) {
- force = 1;
-
- if (v.len == 0)
- __drop(snapbase, k.str, k.len);
- else if (k.len == strlen("from") &&
- strncmp(k.str, "from", k.len) == 0)
- __drop_from(snapbase, v.str, v.len);
- else if (k.len == strlen("to") &&
- strncmp(k.str, "to", k.len) == 0)
- __drop_to(snapbase, v.str, v.len);
- else
- WT_ERR_MSG(session, EINVAL,
- "unexpected value for snapshot "
- "key: %.*s",
- (int)k.len, k.str);
- }
- WT_ERR_NOTFOUND_OK(ret);
- }
- }
-
- /* Discard snapshots named the same as the snapshot being created. */
- __drop(snapbase, name, strlen(name));
-
- /* Add a new snapshot entry at the end of the list. */
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- ;
- WT_ERR(__wt_strdup(session, name, &snap->name));
- F_SET(snap, WT_SNAP_ADD);
-
- /*
- * Lock the snapshots that will be deleted.
- *
- * Snapshots are only locked when tracking is enabled, which covers
- * sync and drop operations, but not close. The reasoning is that
- * there should be no access to a snapshot during close, because any
- * thread accessing a snapshot will also have the current file handle
- * open.
- */
- if (WT_META_TRACKING(session))
- WT_SNAPSHOT_FOREACH(snapbase, deleted)
- if (F_ISSET(deleted, WT_SNAP_DELETE))
- WT_ERR(__wt_session_lock_snapshot(session,
- deleted->name, WT_BTREE_EXCLUSIVE));
-
- /* Flush the file from the cache, creating the snapshot. */
- WT_ERR(__wt_bt_cache_flush(
- session, snapbase, cfg == NULL ? WT_SYNC_DISCARD : WT_SYNC, force));
-
- /* If there was a snapshot, update the metadata and resolve it. */
- if (snap->raw.data == NULL) {
- if (force)
- WT_ERR_MSG(session,
- EINVAL, "cache flush failed to create a snapshot");
- } else {
- WT_ERR(__wt_meta_snaplist_set(session, btree->name, snapbase));
- /*
- * If tracking is enabled, defer making pages available until
- * the end of the transaction. The exception is if the handle
- * is being discarded: in that case, it will be gone by the
- * time we try to apply or unroll the meta tracking event.
- */
- if (WT_META_TRACKING(session) && cfg != NULL) {
- WT_ERR(__wt_meta_track_checkpoint(session));
- tracked = 1;
- } else
- WT_ERR(__wt_bm_snapshot_resolve(session, snapbase));
- }
-
-err: __wt_meta_snaplist_free(session, snapbase);
- if (!tracked)
- __wt_rwunlock(session, btree->snaplock);
-
- __wt_free(session, name_alloc);
-
- return (ret);
-}
diff --git a/src/utilities/util_list.c b/src/utilities/util_list.c
index 19db1677638..53b3a0d87d1 100644
--- a/src/utilities/util_list.c
+++ b/src/utilities/util_list.c
@@ -154,7 +154,7 @@ static int
list_print_checkpoint(WT_SESSION *session, const char *key)
{
WT_DECL_RET;
- WT_SNAPSHOT *snap, *snapbase;
+ WT_CKPT *ckpt, *ckptbase;
size_t len;
time_t t;
uint64_t v;
@@ -163,24 +163,23 @@ list_print_checkpoint(WT_SESSION *session, const char *key)
/*
* We may not find any checkpoints for this file, in which case we don't
* report an error, and continue our caller's loop. Otherwise, read the
- * list of snapshots (which is the same as the list of checkpoints), and
- * print each snapshot's name and time.
+ * list of checkpoints and print each checkpoint's name and time.
*/
- if ((ret = __wt_metadata_get_snaplist(session, key, &snapbase)) != 0)
+ if ((ret = __wt_metadata_get_ckptlist(session, key, &ckptbase)) != 0)
return (ret == WT_NOTFOUND ? 0 : ret);
/* Find the longest name, so we can pretty-print. */
len = 0;
- WT_SNAPSHOT_FOREACH(snapbase, snap)
- if (strlen(snap->name) > len)
- len = strlen(snap->name);
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ if (strlen(ckpt->name) > len)
+ len = strlen(ckpt->name);
++len;
- WT_SNAPSHOT_FOREACH(snapbase, snap) {
- t = (time_t)snap->sec;
- printf("\t%*s: %.24s", (int)len, snap->name, ctime_r(&t, buf));
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ t = (time_t)ckpt->sec;
+ printf("\t%*s: %.24s", (int)len, ckpt->name, ctime_r(&t, buf));
- v = snap->snapshot_size;
+ v = ckpt->ckpt_size;
if (v >= WT_PETABYTE)
printf(" (%" PRIu64 " PB)\n", v / WT_PETABYTE);
else if (v >= WT_TERABYTE)
@@ -195,7 +194,7 @@ list_print_checkpoint(WT_SESSION *session, const char *key)
printf(" (%" PRIu64 " B)\n", v);
}
- __wt_metadata_free_snaplist(session, snapbase);
+ __wt_metadata_free_ckptlist(session, ckptbase);
return (0);
}