summaryrefslogtreecommitdiff
path: root/src/txn/txn_ckpt.c
blob: d60a19386093edd98874df555b8055d63b89417c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
/*-
 * Copyright (c) 2008-2012 WiredTiger, Inc.
 *	All rights reserved.
 *
 * See the file LICENSE for redistribution information.
 */

#include "wt_internal.h"

/*
 * __wt_txn_checkpoint --
 *	Checkpoint a database or a list of objects in the database.
 */
int
__wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG targetconf;
	WT_CONFIG_ITEM cval, k, v;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	WT_TXN_GLOBAL *txn_global;
	int target_list, tracking;
	const char *txn_cfg[] = { "isolation=snapshot", NULL };

	target_list = tracking = 0;
	txn_global = &S2C(session)->txn_global;

	/* Only one checkpoint can be active at a time. */
	__wt_writelock(session, S2C(session)->ckpt_rwlock);
	WT_ERR(__wt_txn_begin(session, txn_cfg));

	/* Prevent eviction from evicting anything newer than this. */
	txn_global->ckpt_txnid = session->txn.snap_min;

	WT_ERR(__wt_meta_track_on(session));
	tracking = 1;

	/* Step through the list of targets and checkpoint each one. */
	cval.len = 0;
	WT_ERR(__wt_config_gets(session, cfg, "target", &cval));
	if (cval.len != 0) {
		WT_ERR(__wt_scr_alloc(session, 512, &tmp));
		WT_ERR(__wt_config_subinit(session, &targetconf, &cval));
		while ((ret = __wt_config_next(&targetconf, &k, &v)) == 0) {
			target_list = 1;
			WT_ERR(__wt_buf_fmt(session, tmp, "%.*s",
			    (int)k.len, k.str));

			if (v.len != 0)
				WT_ERR_MSG(session, EINVAL,
				    "invalid checkpoint target \"%s\": "
				    "URIs may require quoting",
				    (const char *)tmp->data);

			__wt_spin_lock(session, &S2C(session)->schema_lock);
			ret = __wt_schema_worker(
			    session, tmp->data, __wt_checkpoint, cfg, 0);
			__wt_spin_unlock(session, &S2C(session)->schema_lock);

			if (ret != 0)
				WT_ERR_MSG(session, ret, "%s",
				    (const char *)tmp->data);
		}
		if (ret == WT_NOTFOUND)
			ret = 0;
	}

	if (!target_list) {
		/*
		 * Possible checkpoint name.  If checkpoints are named, we must
		 * checkpoint both open and closed files; if checkpoints are not
		 * named, we only checkpoint open files.
		 *
		 * XXX
		 * We don't optimize unnamed checkpoints of a list of targets,
		 * we open the targets and checkpoint them even if they are
		 * quiescent and don't need a checkpoint, believing applications
		 * unlikely to checkpoint a list of closed targets.
		 */
		cval.len = 0;
		WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
		WT_ERR(cval.len == 0 ?
		    __wt_conn_btree_apply(session, __wt_checkpoint, cfg) :
		    __wt_meta_btree_apply(session, __wt_checkpoint, cfg, 0));
	}

err:	/*
	 * XXX Rolling back the changes here is problematic.
	 *
	 * If we unroll here, we need a way to roll back changes to the avail
	 * list for each tree that was successfully synced before the error
	 * occurred.  Otherwise, the next time we try this operation, we will
	 * try to free an old checkpoint again.
	 *
	 * OTOH, if we commit the changes after a failure, we have partially
	 * overwritten the checkpoint, so what ends up on disk is not
	 * consistent.
	 */
	if (tracking)
		WT_TRET(__wt_meta_track_off(session, ret != 0));

	txn_global->ckpt_txnid = WT_TXN_NONE;
	if (F_ISSET(&session->txn, TXN_RUNNING))
		WT_TRET(__wt_txn_release(session));
	__wt_rwunlock(session, S2C(session)->ckpt_rwlock);
	__wt_scr_free(&tmp);
	return (ret);
}

/*
 * __drop --
 *	Drop all checkpoints with a specific name.
 */
static void
__drop(WT_CKPT *ckptbase, const char *name, size_t len)
{
	WT_CKPT *ckpt;

	WT_CKPT_FOREACH(ckptbase, ckpt)
		if (strlen(ckpt->name) == len &&
		    strncmp(ckpt->name, name, len) == 0)
			F_SET(ckpt, WT_CKPT_DELETE);
}

/*
 * __drop_from --
 *	Drop all checkpoints after, and including, the named checkpoint.
 */
static void
__drop_from(WT_CKPT *ckptbase, const char *name, size_t len)
{
	WT_CKPT *ckpt;
	int matched;

	/*
	 * There's a special case -- if the name is "all", then we delete all
	 * of the checkpoints.
	 */
	if (len == strlen("all") && strncmp(name, "all", len) == 0) {
		WT_CKPT_FOREACH(ckptbase, ckpt)
			F_SET(ckpt, WT_CKPT_DELETE);
		return;
	}

	/*
	 * We use the first checkpoint we can find, that is, if there are two
	 * checkpoints with the same name in the list, we'll delete from the
	 * first match to the end.
	 */
	matched = 0;
	WT_CKPT_FOREACH(ckptbase, ckpt) {
		if (!matched &&
		    (strlen(ckpt->name) != len ||
		    strncmp(ckpt->name, name, len) != 0))
			continue;

		matched = 1;
		F_SET(ckpt, WT_CKPT_DELETE);
	}
}

/*
 * __drop_to --
 *	Drop all checkpoints before, and including, the named checkpoint.
 */
static void
__drop_to(WT_CKPT *ckptbase, const char *name, size_t len)
{
	WT_CKPT *ckpt, *mark;

	/*
	 * We use the last checkpoint we can find, that is, if there are two
	 * checkpoints with the same name in the list, we'll delete from the
	 * beginning to the second match, not the first.
	 */
	mark = NULL;
	WT_CKPT_FOREACH(ckptbase, ckpt)
		if (strlen(ckpt->name) == len &&
		    strncmp(ckpt->name, name, len) == 0)
			mark = ckpt;

	if (mark == NULL)
		return;

	WT_CKPT_FOREACH(ckptbase, ckpt) {
		F_SET(ckpt, WT_CKPT_DELETE);

		if (ckpt == mark)
			break;
	}
}

/*
 * __wt_checkpoint --
 *	Checkpoint a tree.
 */
int
__wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_BTREE *btree;
	WT_CKPT *ckpt, *ckptbase, *deleted;
	WT_CONFIG dropconf;
	WT_CONFIG_ITEM cval, k, v;
	WT_DECL_RET;
	const char *name;
	char *name_alloc;
	int force, tracked;

	btree = session->btree;
	force = tracked = 0;
	ckpt = ckptbase = NULL;
	name_alloc = NULL;

	/* Checkpoints are single-threaded. */
	__wt_writelock(session, btree->ckptlock);

	/*
	 * Get the list of checkpoints for this file.  If there's no reference,
	 * this file is dead.  Discard it from the cache without bothering to
	 * write any dirty pages.
	 */
	if ((ret =
	    __wt_meta_ckptlist_get(session, btree->name, &ckptbase)) != 0) {
		if (ret == WT_NOTFOUND)
			ret = __wt_bt_cache_flush(
			    session, NULL, WT_SYNC_DISCARD_NOWRITE, 0);
		goto err;
	}

	/*
	 * This may be a named checkpoint, check the configuration.  If it's a
	 * named checkpoint, set force, we have to create the checkpoint even if
	 * the tree is clean.
	 */
	cval.len = 0;
	if (cfg != NULL)
		WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
	if (cval.len == 0)
		name = WT_INTERNAL_CHKPT;
	else {
		force = 1;
		WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
		name = name_alloc;
	}

	/*
	 * We may be dropping checkpoints, check the configuration.  If we're
	 * dropping checkpoints, set force, we have to create the checkpoint
	 * even if the tree is clean.
	 */
	if (cfg != NULL) {
		cval.len = 0;
		WT_ERR(__wt_config_gets(session, cfg, "drop", &cval));
		if (cval.len != 0) {
			WT_ERR(__wt_config_subinit(session, &dropconf, &cval));
			while ((ret =
			    __wt_config_next(&dropconf, &k, &v)) == 0) {
				force = 1;

				if (v.len == 0)
					__drop(ckptbase, k.str, k.len);
				else if (k.len == strlen("from") &&
				    strncmp(k.str, "from", k.len) == 0)
					__drop_from(ckptbase, v.str, v.len);
				else if (k.len == strlen("to") &&
				    strncmp(k.str, "to", k.len) == 0)
					__drop_to(ckptbase, v.str, v.len);
				else
					WT_ERR_MSG(session, EINVAL,
					    "unexpected value for checkpoint "
					    "key: %.*s",
					    (int)k.len, k.str);
			}
			WT_ERR_NOTFOUND_OK(ret);
		}
	}

	/* Discard checkpoints with the same name as the new checkpoint. */
	__drop(ckptbase, name, strlen(name));

	/* Add a new checkpoint entry at the end of the list. */
	WT_CKPT_FOREACH(ckptbase, ckpt)
		;
	WT_ERR(__wt_strdup(session, name, &ckpt->name));
	F_SET(ckpt, WT_CKPT_ADD);

	/*
	 * Lock the checkpoints that will be deleted.
	 *
	 * Checkpoints are only locked when tracking is enabled, which covers
	 * sync and drop operations, but not close.  The reasoning is that
	 * there should be no access to a checkpoint during close, because any
	 * thread accessing a checkpoint will also have the current file handle
	 * open.
	 */
	if (WT_META_TRACKING(session))
		WT_CKPT_FOREACH(ckptbase, deleted)
			if (F_ISSET(deleted, WT_CKPT_DELETE))
				WT_ERR(__wt_session_lock_checkpoint(session,
				    deleted->name, WT_BTREE_EXCLUSIVE));

	/* Flush the file from the cache, creating the checkpoint. */
	WT_ERR(__wt_bt_cache_flush(
	    session, ckptbase, cfg == NULL ? WT_SYNC_DISCARD : WT_SYNC, force));

	/* If there was a checkpoint, update the metadata and resolve it. */
	if (ckpt->raw.data == NULL) {
		if (force)
			WT_ERR_MSG(session, EINVAL,
			    "cache flush failed to create a checkpoint");
	} else {
		WT_ERR(__wt_meta_ckptlist_set(session, btree->name, ckptbase));
		/*
		 * If tracking is enabled, defer making pages available until
		 * the end of the transaction.  The exception is if the handle
		 * is being discarded: in that case, it will be gone by the
		 * time we try to apply or unroll the meta tracking event.
		 */
		if (WT_META_TRACKING(session) && cfg != NULL) {
			WT_ERR(__wt_meta_track_checkpoint(session));
			tracked = 1;
		} else
			WT_ERR(__wt_bm_checkpoint_resolve(session));
	}

err:	__wt_meta_ckptlist_free(session, ckptbase);
	if (!tracked)
		__wt_rwunlock(session, btree->ckptlock);

	__wt_free(session, name_alloc);

	return (ret);
}