diff options
-rw-r--r-- | src/btree/bt_handle.c | 11 | ||||
-rw-r--r-- | src/btree/bt_slvg.c | 36 | ||||
-rw-r--r-- | src/btree/bt_sync.c | 4 | ||||
-rw-r--r-- | src/btree/rec_write.c | 22 | ||||
-rw-r--r-- | src/include/btree.i | 25 | ||||
-rw-r--r-- | src/include/serial.i | 15 | ||||
-rw-r--r-- | src/meta/meta_ckpt.c | 6 | ||||
-rw-r--r-- | src/txn/txn_ckpt.c | 114 | ||||
-rw-r--r-- | test/suite/test_checkpoint01.py | 42 |
9 files changed, 175 insertions, 100 deletions
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 5459a7f005c..598eba0c1ba 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -340,14 +340,13 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session) * simply has no records, for whatever reason, and trust reconciliation * to figure out it's empty and not write any blocks. * We do not set the tree's modified flag because the checkpoint code - * skips unmodified files in default checkpoints (checkpoints that don't - * require a write unless the file is actually dirty). There's no - * reason to reconcile this file unless the application requires it as - * part of a forced checkpoint or if the application actually modifies - * it. + * skips unmodified files in closing checkpoints (checkpoints that don't + * require a write unless the file is actually dirty). There's no need + * to reconcile this file unless the application does a real checkpoint + * or it's actually modified. */ WT_ERR(__wt_page_modify_init(session, leaf)); - ++leaf->modify->write_gen; + __wt_page_modify_set(leaf); return (0); diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index 6735895f16a..b3b56a51f2d 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -1030,6 +1030,30 @@ __slvg_col_range_missing(WT_SESSION_IMPL *session, WT_STUFF *ss) } /* + * __slvg_modify_init -- + * Initialize a salvage page's modification information. + */ +static int +__slvg_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page) +{ + WT_BTREE *btree; + + btree = session->btree; + + WT_RET(__wt_page_modify_init(session, page)); + + /* + * The page and tree are both dirty -- setting the tree's modification + * flag is probably not necessary (it won't be part of any checkpoints, + * at least in this incarnation), but it's the right thing to do. + */ + __wt_tree_modify_set(btree); + __wt_page_modify_set(page); + + return (0); +} + +/* * __slvg_col_build_internal -- * Build a column-store in-memory page that references all of the leaf * pages we've found. @@ -1056,8 +1080,7 @@ __slvg_col_build_internal( page->u.intl.recno = 1; page->entries = leaf_cnt; page->type = WT_PAGE_COL_INT; - WT_ERR(__wt_page_modify_init(session, page)); - __wt_page_modify_set(session, page); + WT_ERR(__slvg_modify_init(session, page)); for (ref = page->u.intl.t, i = 0; i < ss->pages_next; ++i) { if ((trk = ss->pages[i]) == NULL) @@ -1174,8 +1197,7 @@ __slvg_col_build_leaf( ref->addr = NULL; /* Write the new version of the leaf page to disk. */ - WT_ERR(__wt_page_modify_init(session, page)); - __wt_page_modify_set(session, page); + WT_ERR(__slvg_modify_init(session, page)); WT_ERR(__wt_rec_write(session, page, cookie)); /* Reset the page. */ @@ -1630,8 +1652,7 @@ __slvg_row_build_internal( page->read_gen = 0; page->entries = leaf_cnt; page->type = WT_PAGE_ROW_INT; - WT_ERR(__wt_page_modify_init(session, page)); - __wt_page_modify_set(session, page); + WT_ERR(__slvg_modify_init(session, page)); for (ref = page->u.intl.t, i = 0; i < ss->pages_next; ++i) { if ((trk = ss->pages[i]) == NULL) @@ -1827,8 +1848,7 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session, ref->addr = NULL; /* Write the new version of the leaf page to disk. */ - WT_ERR(__wt_page_modify_init(session, page)); - __wt_page_modify_set(session, page); + WT_ERR(__slvg_modify_init(session, page)); WT_ERR(__wt_rec_write(session, page, cookie)); /* Reset the page. */ diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 327e6729030..321d05608d2 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -20,9 +20,9 @@ __wt_bt_cache_force_write(WT_SESSION_IMPL *session) btree = session->btree; page = btree->root_page; - /* If forcing a checkpoint, dirty the root page to ensure a write. */ + /* Dirty the root page to ensure a write. */ WT_RET(__wt_page_modify_init(session, page)); - __wt_page_modify_set(session, page); + __wt_page_modify_set(page); return (0); } diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index 30281e93b22..8c4926386ca 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -296,7 +296,7 @@ __wt_rec_write( break; } WT_RET(__wt_page_modify_init(session, page)); - __wt_page_modify_set(session, page); + __wt_page_modify_set(page); return (0); } @@ -344,7 +344,7 @@ __wt_rec_write( WT_VERBOSE_RET(session, reconcile, "root page split %p -> %p", page, page->modify->u.split); page = page->modify->u.split; - __wt_page_modify_set(session, page); + __wt_page_modify_set(page); F_CLR(page->modify, WT_PM_REC_SPLIT_MERGE); WT_RET(__wt_rec_write(session, page, NULL)); @@ -1167,7 +1167,7 @@ __wt_rec_bulk_wrapup(WT_CURSOR_BULK *cbulk) /* Mark the page's parent dirty. */ WT_RET(__wt_page_modify_init(session, page->parent)); - __wt_page_modify_set(session, page->parent); + __wt_page_modify_set(page->parent); return (0); } @@ -3062,15 +3062,17 @@ err: __wt_scr_free(&tkey); } /* - * Success: if modifications were skipped, the tree cannot be clean. As - * the checkpoint initiation code might have cleared the tree's modified - * flag, explicitly dirty the page, which includes setting the tree's - * modified flag. If modifications were not skipped, the page might be - * clean, update the disk generation to the write generation as of when - * reconciliation started. + * Success. + * If modifications were skipped, the tree isn't clean. The checkpoint + * call cleared the tree's modified value before it called the eviction + * thread, so we must explicitly reset the tree's modified flag. + * + * If modifications were not skipped, the page might be clean; update + * the disk generation to the write generation as of when reconciliation + * started. */ if (r->upd_skipped) - __wt_page_modify_set(session, page); + __wt_tree_modify_set(btree); else mod->disk_gen = r->orig_write_gen; diff --git a/src/include/btree.i b/src/include/btree.i index 89ca2616f85..bd5d24033ce 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -136,15 +136,32 @@ __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page) } /* + * __wt_tree_modify_clr, set, test -- + * Mark the tree dirty, clean and test. + */ +static inline void +__wt_tree_modify_clr(WT_BTREE *btree) +{ + btree->modified = 0; +} +static inline void +__wt_tree_modify_set(WT_BTREE *btree) +{ + btree->modified = 1; +} +static inline int +__wt_tree_modify_isset(WT_BTREE *btree) +{ + return (btree->modified); +} + +/* * __wt_page_modify_set -- * Mark the page dirty. */ static inline void -__wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) +__wt_page_modify_set(WT_PAGE *page) { - /* Set the tree modified flag. */ - session->btree->modified = 1; - /* * Publish: there must be a barrier to ensure all changes to the page * are flushed before we update the page's write generation, otherwise diff --git a/src/include/serial.i b/src/include/serial.i index d08a778e72f..7448dadc9bc 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -62,9 +62,18 @@ __wt_session_serialize_func(WT_SESSION_IMPL *session, static inline void __wt_session_serialize_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page, int ret) { - /* If passed a page and the return value is OK, we modified the page. */ - if (page != NULL && ret == 0) - __wt_page_modify_set(session, page); + WT_BTREE *btree; + + btree = session->btree; + + /* + * If passed a page and the return value is OK, we modified the tree + * and the page. + */ + if (page != NULL && ret == 0) { + __wt_tree_modify_set(btree); + __wt_page_modify_set(page); + } /* * Publish: there must be a barrier to ensure the return value is set diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c index dce1d4e6d62..5f870be64d8 100644 --- a/src/meta/meta_ckpt.c +++ b/src/meta/meta_ckpt.c @@ -393,8 +393,12 @@ __wt_meta_ckptlist_set( WT_ERR(__wt_raw_to_hex(session, ckpt->raw.data, ckpt->raw.size, &ckpt->addr)); - if (F_ISSET(ckpt, WT_CKPT_ADD)) + if (F_ISSET(ckpt, WT_CKPT_ADD)) { ckpt->order = ++maxorder; + + /* Assert we have a checkpoint. */ + WT_ASSERT(session, ckpt->raw.data != NULL); + } } if (strcmp(ckpt->name, WT_CHECKPOINT) == 0) WT_ERR(__wt_buf_catfmt(session, buf, diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 3304011256f..52485a360a5 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -277,18 +277,17 @@ int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BTREE *btree; - WT_CKPT *ckpt, *ckptbase, *new; + WT_CKPT *ckpt, *ckptbase; WT_CONFIG dropconf; WT_CONFIG_ITEM cval, k, v; WT_CONNECTION_IMPL *conn; WT_DECL_RET; const char *name; char *name_alloc; - int deleted, force, is_checkpoint; + int deleted, is_checkpoint; conn = S2C(session); btree = session->btree; - force = 0; ckpt = ckptbase = NULL; name_alloc = NULL; @@ -314,7 +313,7 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * have to checkpoint it, we'll test again once we understand the * nature of the checkpoint. */ - if (btree->modified == 0 && !is_checkpoint) + if (!__wt_tree_modify_isset(btree) && !is_checkpoint) return (__wt_bt_cache_flush( session, NULL, WT_SYNC_DISCARD_NOWRITE)); @@ -337,21 +336,11 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) name = WT_CHECKPOINT; else { WT_ERR(__ckpt_name_ok(session, cval.str, cval.len)); - - /* - * Dropping and naming checkpoints require a checkpoint even - * if the tree is clean. - */ - force = 1; WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc)); name = name_alloc; } - /* - * We may be dropping specific checkpoints, check the configuration. - * If we're dropping checkpoints, set force, we have to create the - * checkpoint even if the tree is clean. - */ + /* We may be dropping specific checkpoints, check the configuration. */ if (cfg != NULL) { cval.len = 0; WT_ERR(__wt_config_gets(session, cfg, "drop", &cval)); @@ -378,36 +367,42 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) "unexpected value for checkpoint " "key: %.*s", (int)k.len, k.str); - - /* Dropping requires a checkpoint. */ - force = 1; } WT_ERR_NOTFOUND_OK(ret); } } - /* - * Ignore read-only objects if we don't have to take a checkpoint. If - * force is set because the checkpoint is named or there's an explicit - * command to drop a checkpoint, we'll modify the file's root page and - * ensure a checkpoint happens, but otherwise, the object isn't dirty - * and the existing checkpoints are sufficient. - */ - if (btree->modified == 0 && !force) - goto skip; - /* Drop checkpoints with the same name as the one we're taking. */ __drop(ckptbase, name, strlen(name)); /* - * Another check for read-only objects not requiring a checkpoint. If - * the application repeatedly checkpoints the same name (imagine taking - * an hourly checkpoint using the same name), there's no reason to - * repeat the checkpoint for read-only objects. If the only checkpoint - * we're deleting is the last one in the list, and it has the same name - * as the checkpoint we're about to take, skip the work. + * Check for clean objects not requiring a checkpoint. + * + * If we're closing a handle, and the object is clean, we can skip the + * checkpoint, whatever checkpoints we have are sufficient. (We might + * not have any checkpoints if the object was never modified, and that's + * OK: the object creation code doesn't mark the tree modified so we can + * skip newly created trees here.) + * + * If the application repeatedly checkpoints an object (imagine hourly + * checkpoints using the same explicit or internal name), there's no + * reason to repeat the checkpoint for clean objects. The test is if + * the only checkpoint we're deleting is the last one in the list and + * it has the same name as the checkpoint we're about to take, skip the + * work. (We can skip checkpoints that delete more than the last + * checkpoint because deleting those checkpoints might free up space in + * the file.) This means an application toggling between two (or more) + * checkpoint names will repeatedly take empty checkpoints, but that's + * not likely enough to make detection worthwhile. + * + * Checkpoint read-only objects otherwise: the application must be able + * to open the checkpoint in a cursor after taking any checkpoint, which + * means it must exist. */ - if (btree->modified == 0) { + if (!__wt_tree_modify_isset(btree)) { + if (!is_checkpoint) + goto skip; + deleted = 0; WT_CKPT_FOREACH(ckptbase, ckpt) if (F_ISSET(ckpt, WT_CKPT_DELETE)) @@ -423,7 +418,6 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) ; WT_ERR(__wt_strdup(session, name, &ckpt->name)); F_SET(ckpt, WT_CKPT_ADD); - new = ckpt; /* * Lock the checkpoints that will be deleted. @@ -477,15 +471,14 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) } /* - * If we're forcing a checkpoint, notify the cache in case the file - * has no dirty pages. + * If the object is unmodified, notify the cache in case the file has + * no dirty pages. */ - if (force) + if (!__wt_tree_modify_isset(btree)) WT_ERR(__wt_bt_cache_force_write(session)); /* - * Clear the tree's modified value and ensure that write happens before - * we reconcile/write any pages. Any changes before we clear the flag + * Clear the tree's modified flag; any changes before we clear the flag * are guaranteed to be part of this checkpoint (unless reconciliation * skips updates for transactional reasons), and changes subsequent to * the checkpoint start, which might not be included, will re-set the @@ -493,36 +486,25 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * handled in the reconciliation code: if reconciliation skips updates, * it sets the modified flag itself. */ - WT_PUBLISH(btree->modified, 0); + __wt_tree_modify_clr(btree); /* Flush the file from the cache, creating the checkpoint. */ WT_ERR(__wt_bt_cache_flush(session, ckptbase, is_checkpoint ? WT_SYNC : WT_SYNC_DISCARD)); - /* If there was a checkpoint, update the metadata and resolve it. */ - if (new->raw.data == NULL) { - /* - * Our knowledge of whether or not the file has dirty pages - * isn't perfect, we only know if the file was ever modified. - * If we didn't really need a checkpoint, it's not a problem - * if one wasn't created. - */ - if (force) - WT_ERR_MSG(session, EINVAL, - "cache flush failed to create a checkpoint"); - } else { - WT_ERR(__wt_meta_ckptlist_set(session, btree->name, ckptbase)); - /* - * If tracking is enabled, defer making pages available until - * the end of the transaction. The exception is if the handle - * is being discarded: in that case, it will be gone by the - * time we try to apply or unroll the meta tracking event. - */ - if (WT_META_TRACKING(session) && is_checkpoint) - WT_ERR(__wt_meta_track_checkpoint(session)); - else - WT_ERR(__wt_bm_checkpoint_resolve(session)); - } + /* Update the object's metadata. */ + WT_ERR(__wt_meta_ckptlist_set(session, btree->name, ckptbase)); + + /* + * If tracking enabled, defer making pages available until transaction + * end. The exception is if the handle is being discarded, in which + * case the handle will be gone by the time we try to apply or unroll + * the meta tracking event. + */ + if (WT_META_TRACKING(session) && is_checkpoint) + WT_ERR(__wt_meta_track_checkpoint(session)); + else + WT_ERR(__wt_bm_checkpoint_resolve(session)); err: skip: __wt_meta_ckptlist_free(session, ckptbase); diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py index 4596ae8e971..a279ce937d9 100644 --- a/test/suite/test_checkpoint01.py +++ b/test/suite/test_checkpoint01.py @@ -328,6 +328,48 @@ class test_checkpoint_last_name(wttest.WiredTigerTestCase): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.checkpoint(conf), msg) +class test_checkpoint_empty(wttest.WiredTigerTestCase): + scenarios = [ + ('file', dict(uri='file:checkpoint')), + ('table', dict(uri='table:checkpoint')), + ] + + # Create an empty file, do one of 4 cases of checkpoint, then verify the + # checkpoints exist. The reason for the 4 cases is we must create all + # checkpoints an application can explicitly reference using a cursor, and + # I want to test when other types of checkpoints are created before the + # checkpoint we really need. + # Case 1: a named checkpoint + def test_checkpoint_empty_one(self): + self.session.create(self.uri, "key_format=S,value_format=S") + self.session.checkpoint('name=ckpt') + cursor = self.session.open_cursor(self.uri, None, "checkpoint=ckpt") + + # Case 2: an internal checkpoint + def test_checkpoint_empty_two(self): + self.session.create(self.uri, "key_format=S,value_format=S") + self.session.checkpoint() + cursor = self.session.open_cursor( + self.uri, None, "checkpoint=WiredTigerCheckpoint") + + # Case 3: a named checkpoint, then an internal checkpoint + def test_checkpoint_empty_three(self): + self.session.create(self.uri, "key_format=S,value_format=S") + self.session.checkpoint('name=ckpt') + self.session.checkpoint() + cursor = self.session.open_cursor(self.uri, None, "checkpoint=ckpt") + cursor = self.session.open_cursor( + self.uri, None, "checkpoint=WiredTigerCheckpoint") + + # Case 4: an internal checkpoint, then a named checkpoint + def test_checkpoint_empty_four(self): + self.session.create(self.uri, "key_format=S,value_format=S") + self.session.checkpoint() + self.session.checkpoint('name=ckpt') + cursor = self.session.open_cursor(self.uri, None, "checkpoint=ckpt") + cursor = self.session.open_cursor( + self.uri, None, "checkpoint=WiredTigerCheckpoint") + if __name__ == '__main__': wttest.run() |