summaryrefslogtreecommitdiff
path: root/src/mp/mp_fput.c
blob: 06b30fd42b7fe7515d453713a2fad1d331ebd9e4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 1996, 2015 Oracle and/or its affiliates.  All rights reserved.
 *
 * $Id$
 */

#include "db_config.h"

#include "db_int.h"
#include "dbinc/log.h"
#include "dbinc/mp.h"

static int __memp_reset_lru __P((ENV *, REGINFO *));

/*
 * __memp_fput_pp --
 *	DB_MPOOLFILE->put pre/post processing.
 *
 * PUBLIC: int __memp_fput_pp
 * PUBLIC:     __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t));
 */
int
__memp_fput_pp(dbmfp, pgaddr, priority, flags)
	DB_MPOOLFILE *dbmfp;
	void *pgaddr;
	DB_CACHE_PRIORITY priority;
	u_int32_t flags;
{
	DB_THREAD_INFO *ip;
	ENV *env;
	int ret, t_ret;

	env = dbmfp->env;

	if (flags != 0)
		return (__db_ferr(env, "DB_MPOOLFILE->put", 0));

	MPF_ILLEGAL_BEFORE_OPEN(dbmfp, "DB_MPOOLFILE->put");

	ENV_ENTER(env, ip);

	ret = __memp_fput(dbmfp, ip, pgaddr, priority);
	if (IS_ENV_REPLICATED(env) &&
	    (t_ret = __op_rep_exit(env)) != 0 && ret == 0)
		ret = t_ret;

	ENV_LEAVE(env, ip);
	return (ret);
}

/*
 * __memp_fput --
 *	DB_MPOOLFILE->put. Release this reference to the page. If the reference
 * count drop to zero adjust the buffer's cache priority.
 *
 * PUBLIC: int __memp_fput __P((DB_MPOOLFILE *,
 * PUBLIC:      DB_THREAD_INFO *, void *, DB_CACHE_PRIORITY));
 */
int
__memp_fput(dbmfp, ip, pgaddr, priority)
	DB_MPOOLFILE *dbmfp;
	DB_THREAD_INFO *ip;
	void *pgaddr;
	DB_CACHE_PRIORITY priority;
{
	BH *bhp;
	DB_ENV *dbenv;
	DB_MPOOL *dbmp;
	DB_MPOOL_HASH *hp;
	ENV *env;
	MPOOL *c_mp;
	MPOOLFILE *mfp;
	PIN_LIST *list, *lp;
	REGINFO *infop, *reginfo;
	roff_t b_ref;
	int region;
	int adjust, pfactor, ret, t_ret;
	char buf[DB_THREADID_STRLEN];

	env = dbmfp->env;
	dbenv = env->dbenv;
	dbmp = env->mp_handle;
	mfp = dbmfp->mfp;
	bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
	ret = 0;

	/*
	 * If this is marked dummy, we are using it to unpin a buffer for
	 * another thread.
	 */
	if (F_ISSET(dbmfp, MP_DUMMY))
		goto unpin;

	/*
	 * If we're mapping the file, there's nothing to do.  Because we can
	 * stop mapping the file at any time, we have to check on each buffer
	 * to see if the address we gave the application was part of the map
	 * region.
	 */
	if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
	    (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
		return (0);

	DB_ASSERT(env, IS_RECOVERING(env) || bhp->pgno <= mfp->last_pgno ||
	    F_ISSET(bhp, BH_FREED) || !SH_CHAIN_SINGLETON(bhp, vc));
#ifdef DIAGNOSTIC
	/*
	 * Decrement the per-file pinned buffer count (mapped pages aren't
	 * counted).
	 */
	MPOOL_SYSTEM_LOCK(env);
	if (dbmfp->pinref == 0) {
		MPOOL_SYSTEM_UNLOCK(env);
		__db_errx(env, DB_STR_A("3011",
		    "%s: more pages returned than retrieved", "%s"),
		    __memp_fn(dbmfp));
		return (__env_panic(env, EACCES));
	}
	--dbmfp->pinref;
	MPOOL_SYSTEM_UNLOCK(env);
#endif

unpin:
	infop = &dbmp->reginfo[bhp->region];
	c_mp = infop->primary;
	hp = R_ADDR(infop, c_mp->htab);
	hp = &hp[bhp->bucket];

	/*
	 * Check for a reference count going to zero.  This can happen if the
	 * application returns a page twice.
	 */
	if (atomic_read(&bhp->ref) == 0) {
		__db_errx(env, DB_STR_A("3012",
		    "%s: page %lu: unpinned page returned", "%s %lu"),
		    __memp_fn(dbmfp), (u_long)bhp->pgno);
		DB_ASSERT(env, atomic_read(&bhp->ref) != 0);
		return (__env_panic(env, EACCES));
	}

	/* Note the activity so allocation won't decide to quit. */
	++c_mp->put_counter;

	if (ip != NULL) {
		reginfo = env->reginfo;
		list = R_ADDR(reginfo, ip->dbth_pinlist);
		region = (int)(infop - dbmp->reginfo);
		b_ref = R_OFFSET(infop, bhp);
		for (lp = list; lp < &list[ip->dbth_pinmax]; lp++)
			if (lp->b_ref == b_ref && lp->region == region)
				break;

		if (lp == &list[ip->dbth_pinmax]) {
			__db_errx(env, DB_STR_A("3013",
		    "__memp_fput: pinned buffer not found for thread %s",
			    "%s"), dbenv->thread_id_string(dbenv,
			    ip->dbth_pid, ip->dbth_tid, buf));
			return (__env_panic(env, EINVAL));
		}

		lp->b_ref = INVALID_ROFF;
		ip->dbth_pincount--;
	}

	/*
	 * Mark the file dirty.
	 */
	if (F_ISSET(bhp, BH_EXCLUSIVE) && F_ISSET(bhp, BH_DIRTY)) {
		DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0);
		mfp->file_written = 1;
	}

	/*
	 * If more than one reference to the page we're done.  Ignore the
	 * discard flags (for now) and leave the buffer's priority alone.
	 * We are doing this a little early as the remaining ref may or
	 * may not be a write behind.  If it is we set the priority
	 * here, if not it will get set again later.  We might race
	 * and miss setting the priority which would leave it wrong
	 * for a while.
	 */
	DB_ASSERT(env, atomic_read(&bhp->ref) != 0);
	if (atomic_dec(env, &bhp->ref) > 1 || (atomic_read(&bhp->ref) == 1 &&
	    !F_ISSET(bhp, BH_DIRTY))) {
		/*
		 * __memp_pgwrite only has a shared lock while it clears
		 * the BH_DIRTY bit. If we only have a shared latch then
		 * we can't touch the flags bits.
		 */
		if (F_ISSET(bhp, BH_EXCLUSIVE))
			F_CLR(bhp, BH_EXCLUSIVE);
		MUTEX_UNLOCK(env, bhp->mtx_buf);
		return (0);
	}

	/* The buffer should not be accessed again. */
#ifdef DIAG_MVCC
	MUTEX_LOCK(env, hp->mtx_hash);
	if (BH_REFCOUNT(bhp) == 0)
		MVCC_MPROTECT(bhp->buf, mfp->pagesize, 0);
	MUTEX_UNLOCK(env, hp->mtx_hash);
#endif

	/* Update priority values. */
	if (priority == DB_PRIORITY_VERY_LOW ||
	    mfp->priority == MPOOL_PRI_VERY_LOW)
		bhp->priority = 0;
	else {
		/*
		 * We don't lock the LRU priority or the pages field, if
		 * we get garbage (which won't happen on a 32-bit machine), it
		 * only means a buffer has the wrong priority.
		 */
		bhp->priority = c_mp->lru_priority;

		switch (priority) {
		default:
		case DB_PRIORITY_UNCHANGED:
			pfactor = mfp->priority;
			break;
		case DB_PRIORITY_VERY_LOW:
			pfactor = MPOOL_PRI_VERY_LOW;
			break;
		case DB_PRIORITY_LOW:
			pfactor = MPOOL_PRI_LOW;
			break;
		case DB_PRIORITY_DEFAULT:
			pfactor = MPOOL_PRI_DEFAULT;
			break;
		case DB_PRIORITY_HIGH:
			pfactor = MPOOL_PRI_HIGH;
			break;
		case DB_PRIORITY_VERY_HIGH:
			pfactor = MPOOL_PRI_VERY_HIGH;
			break;
		}

		adjust = 0;
		if (pfactor != 0)
			adjust = (int)c_mp->pages / pfactor;

		if (F_ISSET(bhp, BH_DIRTY))
			adjust += (int)c_mp->pages / MPOOL_PRI_DIRTY;

		if (adjust > 0) {
			if (MPOOL_LRU_REDZONE - bhp->priority >=
			    (u_int32_t)adjust)
				bhp->priority += adjust;
		} else if (adjust < 0)
			if (bhp->priority > (u_int32_t)-adjust)
				bhp->priority += adjust;
	}

	/*
	 * __memp_pgwrite only has a shared lock while it clears the
	 * BH_DIRTY bit. If we only have a shared latch then we can't
	 * touch the flags bits.
	 */
	if (F_ISSET(bhp, BH_EXCLUSIVE))
		F_CLR(bhp, BH_EXCLUSIVE);
	MUTEX_UNLOCK(env, bhp->mtx_buf);

	/*
	 * On every buffer put we update the cache lru priority and check
	 * for wraparound. The increment doesn't need to be atomic: occasional
	 * lost increments are okay; __memp_reset_lru handles race conditions.
	 */
	if (++c_mp->lru_priority >= MPOOL_LRU_REDZONE &&
	    (t_ret = __memp_reset_lru(env, infop)) != 0 && ret == 0)
		ret = t_ret;

	return (ret);
}

/*
 * __memp_reset_lru --
 *	Reset the cache LRU priority when it reaches the upper limit.
 */
static int
__memp_reset_lru(env, infop)
	ENV *env;
	REGINFO *infop;
{
	BH *bhp, *tbhp;
	DB_MPOOL_HASH *hp;
	MPOOL *c_mp;
	u_int32_t bucket;
	int reset;

	/*
	 * Update the priority so all future allocations will start at the
	 * bottom. Lock this cache region to ensure that exactly one thread
	 * will reset this cache's buffers.
	 */
	c_mp = infop->primary;
	MPOOL_REGION_LOCK(env, infop);
	reset = c_mp->lru_priority >= MPOOL_LRU_DECREMENT;
	if (reset) {
		c_mp->lru_priority -= MPOOL_LRU_DECREMENT;
		c_mp->lru_generation++;
	}
	MPOOL_REGION_UNLOCK(env, infop);

	if (!reset)
		return (0);

	/* Reduce the priority of every buffer in this cache region. */
	for (hp = R_ADDR(infop, c_mp->htab),
	    bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
		/*
		 * Skip empty buckets.
		 *
		 * We can check for empty buckets before locking as we
		 * only care if the pointer is zero or non-zero.
		 */
		if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
			continue;

		MUTEX_LOCK(env, hp->mtx_hash);
		SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) {
			for (tbhp = bhp; tbhp != NULL;
			    tbhp = SH_CHAIN_PREV(tbhp, vc, __bh)) {
				if (tbhp->priority > MPOOL_LRU_DECREMENT)
					tbhp->priority -= MPOOL_LRU_DECREMENT;
				else
					tbhp->priority = 0;
			}
		}
		MUTEX_UNLOCK(env, hp->mtx_hash);
	}

	COMPQUIET(env, NULL);
	return (0);
}

/*
 * __memp_unpin_buffers --
 *	Unpin buffers pinned by a thread.
 *
 * PUBLIC: int __memp_unpin_buffers __P((ENV *, DB_THREAD_INFO *));
 */
int
__memp_unpin_buffers(env, ip)
	ENV *env;
	DB_THREAD_INFO *ip;
{
	BH *bhp;
	DB_MPOOL *dbmp;
	DB_MPOOLFILE dbmf;
	PIN_LIST *list, *lp;
	REGINFO *rinfop, *reginfo;
	int ret;

	memset(&dbmf, 0, sizeof(dbmf));
	dbmf.env = env;
	dbmf.flags = MP_DUMMY;
	dbmp = env->mp_handle;
	reginfo = env->reginfo;

	list = R_ADDR(reginfo, ip->dbth_pinlist);
	for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) {
		if (lp->b_ref == INVALID_ROFF)
			continue;
		rinfop = &dbmp->reginfo[lp->region];
		bhp = R_ADDR(rinfop, lp->b_ref);
		dbmf.mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
		if ((ret = __memp_fput(&dbmf, ip,
		    (u_int8_t *)bhp + SSZA(BH, buf),
		    DB_PRIORITY_UNCHANGED)) != 0)
			return (ret);
	}
	return (0);
}