summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/btree/bt_handle.c11
-rw-r--r--src/btree/bt_slvg.c36
-rw-r--r--src/btree/bt_sync.c4
-rw-r--r--src/btree/rec_write.c22
-rw-r--r--src/include/btree.i25
-rw-r--r--src/include/serial.i15
-rw-r--r--src/meta/meta_ckpt.c6
-rw-r--r--src/txn/txn_ckpt.c114
-rw-r--r--test/suite/test_checkpoint01.py42
9 files changed, 175 insertions, 100 deletions
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 5459a7f005c..598eba0c1ba 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -340,14 +340,13 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session)
* simply has no records, for whatever reason, and trust reconciliation
* to figure out it's empty and not write any blocks.
* We do not set the tree's modified flag because the checkpoint code
- * skips unmodified files in default checkpoints (checkpoints that don't
- * require a write unless the file is actually dirty). There's no
- * reason to reconcile this file unless the application requires it as
- * part of a forced checkpoint or if the application actually modifies
- * it.
+ * skips unmodified files in closing checkpoints (checkpoints that don't
+ * require a write unless the file is actually dirty). There's no need
+ * to reconcile this file unless the application does a real checkpoint
+ * or it's actually modified.
*/
WT_ERR(__wt_page_modify_init(session, leaf));
- ++leaf->modify->write_gen;
+ __wt_page_modify_set(leaf);
return (0);
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index 6735895f16a..b3b56a51f2d 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -1030,6 +1030,30 @@ __slvg_col_range_missing(WT_SESSION_IMPL *session, WT_STUFF *ss)
}
/*
+ * __slvg_modify_init --
+ * Initialize a salvage page's modification information.
+ */
+static int
+__slvg_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page)
+{
+ WT_BTREE *btree;
+
+ btree = session->btree;
+
+ WT_RET(__wt_page_modify_init(session, page));
+
+ /*
+ * The page and tree are both dirty -- setting the tree's modification
+ * flag is probably not necessary (it won't be part of any checkpoints,
+ * at least in this incarnation), but it's the right thing to do.
+ */
+ __wt_tree_modify_set(btree);
+ __wt_page_modify_set(page);
+
+ return (0);
+}
+
+/*
* __slvg_col_build_internal --
* Build a column-store in-memory page that references all of the leaf
* pages we've found.
@@ -1056,8 +1080,7 @@ __slvg_col_build_internal(
page->u.intl.recno = 1;
page->entries = leaf_cnt;
page->type = WT_PAGE_COL_INT;
- WT_ERR(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
+ WT_ERR(__slvg_modify_init(session, page));
for (ref = page->u.intl.t, i = 0; i < ss->pages_next; ++i) {
if ((trk = ss->pages[i]) == NULL)
@@ -1174,8 +1197,7 @@ __slvg_col_build_leaf(
ref->addr = NULL;
/* Write the new version of the leaf page to disk. */
- WT_ERR(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
+ WT_ERR(__slvg_modify_init(session, page));
WT_ERR(__wt_rec_write(session, page, cookie));
/* Reset the page. */
@@ -1630,8 +1652,7 @@ __slvg_row_build_internal(
page->read_gen = 0;
page->entries = leaf_cnt;
page->type = WT_PAGE_ROW_INT;
- WT_ERR(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
+ WT_ERR(__slvg_modify_init(session, page));
for (ref = page->u.intl.t, i = 0; i < ss->pages_next; ++i) {
if ((trk = ss->pages[i]) == NULL)
@@ -1827,8 +1848,7 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session,
ref->addr = NULL;
/* Write the new version of the leaf page to disk. */
- WT_ERR(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
+ WT_ERR(__slvg_modify_init(session, page));
WT_ERR(__wt_rec_write(session, page, cookie));
/* Reset the page. */
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 327e6729030..321d05608d2 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -20,9 +20,9 @@ __wt_bt_cache_force_write(WT_SESSION_IMPL *session)
btree = session->btree;
page = btree->root_page;
- /* If forcing a checkpoint, dirty the root page to ensure a write. */
+ /* Dirty the root page to ensure a write. */
WT_RET(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
+ __wt_page_modify_set(page);
return (0);
}
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 30281e93b22..8c4926386ca 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -296,7 +296,7 @@ __wt_rec_write(
break;
}
WT_RET(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
+ __wt_page_modify_set(page);
return (0);
}
@@ -344,7 +344,7 @@ __wt_rec_write(
WT_VERBOSE_RET(session, reconcile,
"root page split %p -> %p", page, page->modify->u.split);
page = page->modify->u.split;
- __wt_page_modify_set(session, page);
+ __wt_page_modify_set(page);
F_CLR(page->modify, WT_PM_REC_SPLIT_MERGE);
WT_RET(__wt_rec_write(session, page, NULL));
@@ -1167,7 +1167,7 @@ __wt_rec_bulk_wrapup(WT_CURSOR_BULK *cbulk)
/* Mark the page's parent dirty. */
WT_RET(__wt_page_modify_init(session, page->parent));
- __wt_page_modify_set(session, page->parent);
+ __wt_page_modify_set(page->parent);
return (0);
}
@@ -3062,15 +3062,17 @@ err: __wt_scr_free(&tkey);
}
/*
- * Success: if modifications were skipped, the tree cannot be clean. As
- * the checkpoint initiation code might have cleared the tree's modified
- * flag, explicitly dirty the page, which includes setting the tree's
- * modified flag. If modifications were not skipped, the page might be
- * clean, update the disk generation to the write generation as of when
- * reconciliation started.
+ * Success.
+ * If modifications were skipped, the tree isn't clean. The checkpoint
+ * call cleared the tree's modified value before it called the eviction
+ * thread, so we must explicitly reset the tree's modified flag.
+ *
+ * If modifications were not skipped, the page might be clean; update
+ * the disk generation to the write generation as of when reconciliation
+ * started.
*/
if (r->upd_skipped)
- __wt_page_modify_set(session, page);
+ __wt_tree_modify_set(btree);
else
mod->disk_gen = r->orig_write_gen;
diff --git a/src/include/btree.i b/src/include/btree.i
index 89ca2616f85..bd5d24033ce 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -136,15 +136,32 @@ __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
+ * __wt_tree_modify_clr, set, test --
+ * Mark the tree dirty, clean and test.
+ */
+static inline void
+__wt_tree_modify_clr(WT_BTREE *btree)
+{
+ btree->modified = 0;
+}
+static inline void
+__wt_tree_modify_set(WT_BTREE *btree)
+{
+ btree->modified = 1;
+}
+static inline int
+__wt_tree_modify_isset(WT_BTREE *btree)
+{
+ return (btree->modified);
+}
+
+/*
* __wt_page_modify_set --
* Mark the page dirty.
*/
static inline void
-__wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
+__wt_page_modify_set(WT_PAGE *page)
{
- /* Set the tree modified flag. */
- session->btree->modified = 1;
-
/*
* Publish: there must be a barrier to ensure all changes to the page
* are flushed before we update the page's write generation, otherwise
diff --git a/src/include/serial.i b/src/include/serial.i
index d08a778e72f..7448dadc9bc 100644
--- a/src/include/serial.i
+++ b/src/include/serial.i
@@ -62,9 +62,18 @@ __wt_session_serialize_func(WT_SESSION_IMPL *session,
static inline void
__wt_session_serialize_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page, int ret)
{
- /* If passed a page and the return value is OK, we modified the page. */
- if (page != NULL && ret == 0)
- __wt_page_modify_set(session, page);
+ WT_BTREE *btree;
+
+ btree = session->btree;
+
+ /*
+ * If passed a page and the return value is OK, we modified the tree
+ * and the page.
+ */
+ if (page != NULL && ret == 0) {
+ __wt_tree_modify_set(btree);
+ __wt_page_modify_set(page);
+ }
/*
* Publish: there must be a barrier to ensure the return value is set
diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c
index dce1d4e6d62..5f870be64d8 100644
--- a/src/meta/meta_ckpt.c
+++ b/src/meta/meta_ckpt.c
@@ -393,8 +393,12 @@ __wt_meta_ckptlist_set(
WT_ERR(__wt_raw_to_hex(session,
ckpt->raw.data, ckpt->raw.size, &ckpt->addr));
- if (F_ISSET(ckpt, WT_CKPT_ADD))
+ if (F_ISSET(ckpt, WT_CKPT_ADD)) {
ckpt->order = ++maxorder;
+
+ /* Assert we have a checkpoint. */
+ WT_ASSERT(session, ckpt->raw.data != NULL);
+ }
}
if (strcmp(ckpt->name, WT_CHECKPOINT) == 0)
WT_ERR(__wt_buf_catfmt(session, buf,
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 3304011256f..52485a360a5 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -277,18 +277,17 @@ int
__wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_BTREE *btree;
- WT_CKPT *ckpt, *ckptbase, *new;
+ WT_CKPT *ckpt, *ckptbase;
WT_CONFIG dropconf;
WT_CONFIG_ITEM cval, k, v;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
const char *name;
char *name_alloc;
- int deleted, force, is_checkpoint;
+ int deleted, is_checkpoint;
conn = S2C(session);
btree = session->btree;
- force = 0;
ckpt = ckptbase = NULL;
name_alloc = NULL;
@@ -314,7 +313,7 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* have to checkpoint it, we'll test again once we understand the
* nature of the checkpoint.
*/
- if (btree->modified == 0 && !is_checkpoint)
+ if (!__wt_tree_modify_isset(btree) && !is_checkpoint)
return (__wt_bt_cache_flush(
session, NULL, WT_SYNC_DISCARD_NOWRITE));
@@ -337,21 +336,11 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
name = WT_CHECKPOINT;
else {
WT_ERR(__ckpt_name_ok(session, cval.str, cval.len));
-
- /*
- * Dropping and naming checkpoints require a checkpoint even
- * if the tree is clean.
- */
- force = 1;
WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
name = name_alloc;
}
- /*
- * We may be dropping specific checkpoints, check the configuration.
- * If we're dropping checkpoints, set force, we have to create the
- * checkpoint even if the tree is clean.
- */
+ /* We may be dropping specific checkpoints, check the configuration. */
if (cfg != NULL) {
cval.len = 0;
WT_ERR(__wt_config_gets(session, cfg, "drop", &cval));
@@ -378,36 +367,42 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
"unexpected value for checkpoint "
"key: %.*s",
(int)k.len, k.str);
-
- /* Dropping requires a checkpoint. */
- force = 1;
}
WT_ERR_NOTFOUND_OK(ret);
}
}
- /*
- * Ignore read-only objects if we don't have to take a checkpoint. If
- * force is set because the checkpoint is named or there's an explicit
- * command to drop a checkpoint, we'll modify the file's root page and
- * ensure a checkpoint happens, but otherwise, the object isn't dirty
- * and the existing checkpoints are sufficient.
- */
- if (btree->modified == 0 && !force)
- goto skip;
-
/* Drop checkpoints with the same name as the one we're taking. */
__drop(ckptbase, name, strlen(name));
/*
- * Another check for read-only objects not requiring a checkpoint. If
- * the application repeatedly checkpoints the same name (imagine taking
- * an hourly checkpoint using the same name), there's no reason to
- * repeat the checkpoint for read-only objects. If the only checkpoint
- * we're deleting is the last one in the list, and it has the same name
- * as the checkpoint we're about to take, skip the work.
+ * Check for clean objects not requiring a checkpoint.
+ *
+ * If we're closing a handle, and the object is clean, we can skip the
+ * checkpoint, whatever checkpoints we have are sufficient. (We might
+ * not have any checkpoints if the object was never modified, and that's
+ * OK: the object creation code doesn't mark the tree modified so we can
+ * skip newly created trees here.)
+ *
+ * If the application repeatedly checkpoints an object (imagine hourly
+ * checkpoints using the same explicit or internal name), there's no
+ * reason to repeat the checkpoint for clean objects. The test is if
+ * the only checkpoint we're deleting is the last one in the list and
+ * it has the same name as the checkpoint we're about to take, skip the
+ * work. (We can skip checkpoints that delete more than the last
+ * checkpoint because deleting those checkpoints might free up space in
+ * the file.) This means an application toggling between two (or more)
+ * checkpoint names will repeatedly take empty checkpoints, but that's
+ * not likely enough to make detection worthwhile.
+ *
+ * Checkpoint read-only objects otherwise: the application must be able
+ * to open the checkpoint in a cursor after taking any checkpoint, which
+ * means it must exist.
*/
- if (btree->modified == 0) {
+ if (!__wt_tree_modify_isset(btree)) {
+ if (!is_checkpoint)
+ goto skip;
+
deleted = 0;
WT_CKPT_FOREACH(ckptbase, ckpt)
if (F_ISSET(ckpt, WT_CKPT_DELETE))
@@ -423,7 +418,6 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
;
WT_ERR(__wt_strdup(session, name, &ckpt->name));
F_SET(ckpt, WT_CKPT_ADD);
- new = ckpt;
/*
* Lock the checkpoints that will be deleted.
@@ -477,15 +471,14 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
}
/*
- * If we're forcing a checkpoint, notify the cache in case the file
- * has no dirty pages.
+ * If the object is unmodified, notify the cache in case the file has
+ * no dirty pages.
*/
- if (force)
+ if (!__wt_tree_modify_isset(btree))
WT_ERR(__wt_bt_cache_force_write(session));
/*
- * Clear the tree's modified value and ensure that write happens before
- * we reconcile/write any pages. Any changes before we clear the flag
+ * Clear the tree's modified flag; any changes before we clear the flag
* are guaranteed to be part of this checkpoint (unless reconciliation
* skips updates for transactional reasons), and changes subsequent to
* the checkpoint start, which might not be included, will re-set the
@@ -493,36 +486,25 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* handled in the reconciliation code: if reconciliation skips updates,
* it sets the modified flag itself.
*/
- WT_PUBLISH(btree->modified, 0);
+ __wt_tree_modify_clr(btree);
/* Flush the file from the cache, creating the checkpoint. */
WT_ERR(__wt_bt_cache_flush(session,
ckptbase, is_checkpoint ? WT_SYNC : WT_SYNC_DISCARD));
- /* If there was a checkpoint, update the metadata and resolve it. */
- if (new->raw.data == NULL) {
- /*
- * Our knowledge of whether or not the file has dirty pages
- * isn't perfect, we only know if the file was ever modified.
- * If we didn't really need a checkpoint, it's not a problem
- * if one wasn't created.
- */
- if (force)
- WT_ERR_MSG(session, EINVAL,
- "cache flush failed to create a checkpoint");
- } else {
- WT_ERR(__wt_meta_ckptlist_set(session, btree->name, ckptbase));
- /*
- * If tracking is enabled, defer making pages available until
- * the end of the transaction. The exception is if the handle
- * is being discarded: in that case, it will be gone by the
- * time we try to apply or unroll the meta tracking event.
- */
- if (WT_META_TRACKING(session) && is_checkpoint)
- WT_ERR(__wt_meta_track_checkpoint(session));
- else
- WT_ERR(__wt_bm_checkpoint_resolve(session));
- }
+ /* Update the object's metadata. */
+ WT_ERR(__wt_meta_ckptlist_set(session, btree->name, ckptbase));
+
+ /*
+ * If tracking enabled, defer making pages available until transaction
+ * end. The exception is if the handle is being discarded, in which
+ * case the handle will be gone by the time we try to apply or unroll
+ * the meta tracking event.
+ */
+ if (WT_META_TRACKING(session) && is_checkpoint)
+ WT_ERR(__wt_meta_track_checkpoint(session));
+ else
+ WT_ERR(__wt_bm_checkpoint_resolve(session));
err:
skip: __wt_meta_ckptlist_free(session, ckptbase);
diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py
index 4596ae8e971..a279ce937d9 100644
--- a/test/suite/test_checkpoint01.py
+++ b/test/suite/test_checkpoint01.py
@@ -328,6 +328,48 @@ class test_checkpoint_last_name(wttest.WiredTigerTestCase):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.checkpoint(conf), msg)
+class test_checkpoint_empty(wttest.WiredTigerTestCase):
+ scenarios = [
+ ('file', dict(uri='file:checkpoint')),
+ ('table', dict(uri='table:checkpoint')),
+ ]
+
+ # Create an empty file, do one of 4 cases of checkpoint, then verify the
+ # checkpoints exist. The reason for the 4 cases is we must create all
+ # checkpoints an application can explicitly reference using a cursor, and
+ # I want to test when other types of checkpoints are created before the
+ # checkpoint we really need.
+ # Case 1: a named checkpoint
+ def test_checkpoint_empty_one(self):
+ self.session.create(self.uri, "key_format=S,value_format=S")
+ self.session.checkpoint('name=ckpt')
+ cursor = self.session.open_cursor(self.uri, None, "checkpoint=ckpt")
+
+ # Case 2: an internal checkpoint
+ def test_checkpoint_empty_two(self):
+ self.session.create(self.uri, "key_format=S,value_format=S")
+ self.session.checkpoint()
+ cursor = self.session.open_cursor(
+ self.uri, None, "checkpoint=WiredTigerCheckpoint")
+
+ # Case 3: a named checkpoint, then an internal checkpoint
+ def test_checkpoint_empty_three(self):
+ self.session.create(self.uri, "key_format=S,value_format=S")
+ self.session.checkpoint('name=ckpt')
+ self.session.checkpoint()
+ cursor = self.session.open_cursor(self.uri, None, "checkpoint=ckpt")
+ cursor = self.session.open_cursor(
+ self.uri, None, "checkpoint=WiredTigerCheckpoint")
+
+ # Case 4: an internal checkpoint, then a named checkpoint
+ def test_checkpoint_empty_four(self):
+ self.session.create(self.uri, "key_format=S,value_format=S")
+ self.session.checkpoint()
+ self.session.checkpoint('name=ckpt')
+ cursor = self.session.open_cursor(self.uri, None, "checkpoint=ckpt")
+ cursor = self.session.open_cursor(
+ self.uri, None, "checkpoint=WiredTigerCheckpoint")
+
if __name__ == '__main__':
wttest.run()