summaryrefslogtreecommitdiff
path: root/src/env/env_toc.c
blob: 46d132707b5217dd8559939599b875dbd6c2c9ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 2008-2011 WiredTiger, Inc.
 *	All rights reserved.
 *
 * $Id$
 */

#include "wt_internal.h"

/*
 * __wt_env_toc --
 *	ENV.toc method.
 */
int
__wt_env_toc(ENV *env, WT_TOC **tocp)
{
	IENV *ienv;
	WT_TOC *toc;
	uint32_t slot;

	ienv = env->ienv;
	*tocp = NULL;

	/* Check to see if there's an available WT_TOC slot. */
	if (ienv->toc_cnt == env->toc_size - 1) {
		__wt_api_env_errx(env,
		    "WiredTiger only configured to support %d thread contexts",
		    env->toc_size);
		return (WT_ERROR);
	}

	/*
	 * The WT_TOC reference list is compact, the WT_TOC array is not.  Find
	 * the first empty WT_TOC slot.
	 */
	for (slot = 0, toc = ienv->toc_array; toc->env != NULL; ++toc, ++slot)
		;

	/* Clear previous contents of the WT_TOC entry, they get re-used. */
	memset(toc, 0, sizeof(WT_TOC));

	toc->env = env;
	toc->hazard = ienv->hazard + slot * env->hazard_size;

	WT_RET(__wt_mtx_alloc(env, "toc", 1, &toc->mtx));

	__wt_methods_wt_toc_lockout(toc);
	__wt_methods_wt_toc_init_transition(toc);

	/* Make the entry visible to the workQ. */
	ienv->toc[ienv->toc_cnt++] = toc;
	WT_MEMORY_FLUSH;

	*tocp = toc;
	return (0);
}

/*
 * __wt_wt_toc_close --
 *	WT_TOC.close method.
 */
int
__wt_wt_toc_close(WT_TOC *toc)
{
	ENV *env;
	IENV *ienv;
	WT_TOC **tp;
	WT_TOC_UPDATE *update;
	int ret;

	env = toc->env;
	ienv = env->ienv;
	ret = 0;

	WT_ENV_FCHK_RET(
	    env, "WT_TOC.close", toc->flags, WT_APIMASK_WT_TOC, ret);

	/*
	 * The "in" reference count is artificially incremented by 1 as
	 * long as an update buffer is referenced by the WT_TOC thread;
	 * we don't want them freed because a page was evicted and their
	 * count went to 0.  Decrement the reference count on the buffer
	 * as part of releasing it.  There's a similar reference count
	 * decrement when the WT_TOC structure is discarded.
	 *
	 * XXX
	 * There's a race here: if this code, or the WT_TOC structure
	 * close code, and the page discard code race, it's possible
	 * neither will realize the buffer is no longer needed and free
	 * it.  The fix is to involve the eviction or workQ threads:
	 * they may need a linked list of buffers they review to ensure
	 * it never happens.  I'm living with this now: it's unlikely
	 * and it's a memory leak if it ever happens.
	 */
	update = toc->update;
	if (update != NULL && --update->in == update->out)
		__wt_free(env, update, update->len);

	/* Discard DBT memory. */
	__wt_free(env, toc->key.data, toc->key.mem_size);
	__wt_free(env, toc->data.data, toc->data.mem_size);
	__wt_scr_free(toc);

	/* Unlock and destroy the thread's mutex. */
	if (toc->mtx != NULL) {
		__wt_unlock(env, toc->mtx);
		(void)__wt_mtx_destroy(env, toc->mtx);
	}

	/*
	 * Replace the WT_TOC reference we're closing with the last entry in
	 * the table, then clear the last entry.  As far as the walk of the
	 * workQ is concerned, it's OK if the WT_TOC appears twice, or if it
	 * doesn't appear at all, so these lines can race all they want.
	 */
	for (tp = ienv->toc; *tp != toc; ++tp)
		;
	--ienv->toc_cnt;
	*tp = ienv->toc[ienv->toc_cnt];
	ienv->toc[ienv->toc_cnt] = NULL;

	/* Make the WT_TOC array entry available for re-use. */
	toc->env = NULL;
	WT_MEMORY_FLUSH;

	return (ret);
}

/*
 * __wt_toc_api_set --
 *	Pair WT_TOC and DB handle, allocating the WT_TOC as necessary.
 */
int
__wt_toc_api_set(ENV *env, const char *name, DB *db, WT_TOC **tocp)
{
	WT_TOC *toc;

	/*
	 * We pass around WT_TOCs internally in the Btree, (rather than a DB),
	 * because the DB's are free-threaded, and the WT_TOCs are per-thread.
	 * Lots of the API calls don't require the application to allocate and
	 * manage the WT_TOC, which means we have to do it for them.
	 *
	 * WT_TOCs always reference a DB handle, and we do that here, as well.
	 */
	if ((toc = *tocp) == NULL) {
		WT_RET(env->toc(env, 0, tocp));
		toc = *tocp;
	}
	toc->db = db;
	toc->name = name;
	return (0);
}

/*
 * __wt_toc_api_clr --
 *	Clear the WT_TOC, freeing it if it was allocated by the library.
 */
int
__wt_toc_api_clr(WT_TOC *toc, const char *name, int islocal)
{
	/*
	 * The WT_TOC should hold no more hazard references; this is a
	 * diagnostic check, but it's cheap so we do it all the time.
	 */
	__wt_hazard_empty(toc, name);

	if (islocal)
		return (toc->close(toc, 0));

	toc->db = NULL;
	toc->name = NULL;
	return (0);
}

#ifdef HAVE_DIAGNOSTIC
static const char *__wt_toc_print_state(WT_TOC *);

int
__wt_toc_dump(ENV *env)
{
	IENV *ienv;
	WT_MBUF mb;
	WT_TOC *toc, **tp;
	WT_PAGE **hp;

	ienv = env->ienv;
	__wt_mb_init(env, &mb);

	__wt_mb_add(&mb, "%s\n", ienv->sep);
	for (tp = ienv->toc; (toc = *tp) != NULL; ++tp) {
		__wt_mb_add(&mb,
		    "toc: %p {\n\tworkq func: ", toc);
		if (toc->wq_func == NULL)
			__wt_mb_add(&mb, "none");
		else
			__wt_mb_add(&mb, "%p", toc->wq_func);

		__wt_mb_add(&mb, " state: %s", __wt_toc_print_state(toc));

		__wt_mb_add(&mb, "\n\thazard: ");
		for (hp = toc->hazard;
		    hp < toc->hazard + env->hazard_size; ++hp)
			__wt_mb_add(&mb, "%p ", *hp);

		__wt_mb_add(&mb, "\n}");
		if (toc->name != NULL)
			__wt_mb_add(&mb, " %s", toc->name);
		__wt_mb_write(&mb);
	}

	__wt_mb_discard(&mb);
	return (0);
}

/*
 * __wt_toc_print_state --
 *	Return the WT_TOC state as a string.
 */
static const char *
__wt_toc_print_state(WT_TOC *toc)
{
	switch (toc->wq_state) {
	case WT_WORKQ_READ:
		return ("read");
	case WT_WORKQ_READ_SCHED:
		return ("read scheduled");
	case WT_WORKQ_FUNC:
		return ("function");
	case WT_WORKQ_NONE:
		return ("none");
	}
	return ("unknown");
	/* NOTREACHED */
}
#endif