summaryrefslogtreecommitdiff
path: root/bdb/mp/mp_fput.c
diff options
context:
space:
mode:
Diffstat (limited to 'bdb/mp/mp_fput.c')
-rw-r--r--bdb/mp/mp_fput.c196
1 files changed, 106 insertions, 90 deletions
diff --git a/bdb/mp/mp_fput.c b/bdb/mp/mp_fput.c
index be03b721f36..271e44a4ef8 100644
--- a/bdb/mp/mp_fput.c
+++ b/bdb/mp/mp_fput.c
@@ -1,13 +1,13 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: mp_fput.c,v 11.16 2000/11/30 00:58:41 ubell Exp $";
+static const char revid[] = "$Id: mp_fput.c,v 11.36 2002/08/09 19:04:11 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -15,43 +15,32 @@ static const char revid[] = "$Id: mp_fput.c,v 11.16 2000/11/30 00:58:41 ubell Ex
#endif
-#ifdef HAVE_RPC
-#include "db_server.h"
-#endif
-
#include "db_int.h"
-#include "db_shash.h"
-#include "mp.h"
-
-#ifdef HAVE_RPC
-#include "gen_client_ext.h"
-#include "rpc_client_ext.h"
-#endif
+#include "dbinc/db_shash.h"
+#include "dbinc/mp.h"
/*
- * memp_fput --
+ * __memp_fput --
* Mpool file put function.
+ *
+ * PUBLIC: int __memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t));
*/
int
-memp_fput(dbmfp, pgaddr, flags)
+__memp_fput(dbmfp, pgaddr, flags)
DB_MPOOLFILE *dbmfp;
void *pgaddr;
u_int32_t flags;
{
- BH *bhp;
+ BH *argbhp, *bhp, *prev;
DB_ENV *dbenv;
DB_MPOOL *dbmp;
- MPOOL *c_mp, *mp;
- int ret, wrote;
+ DB_MPOOL_HASH *hp;
+ MPOOL *c_mp;
+ u_int32_t n_cache;
+ int adjust, ret;
dbmp = dbmfp->dbmp;
dbenv = dbmp->dbenv;
- mp = dbmp->reginfo[0].primary;
-
-#ifdef HAVE_RPC
- if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
- return (__dbcl_memp_fput(dbmfp, pgaddr, flags));
-#endif
PANIC_CHECK(dbenv);
@@ -72,17 +61,6 @@ memp_fput(dbmfp, pgaddr, flags)
}
}
- R_LOCK(dbenv, dbmp->reginfo);
-
- /* Decrement the pinned reference count. */
- if (dbmfp->pinref == 0) {
- __db_err(dbenv,
- "%s: more pages returned than retrieved", __memp_fn(dbmfp));
- R_UNLOCK(dbenv, dbmp->reginfo);
- return (EINVAL);
- } else
- --dbmfp->pinref;
-
/*
* If we're mapping the file, there's nothing to do. Because we can
* stop mapping the file at any time, we have to check on each buffer
@@ -90,97 +68,135 @@ memp_fput(dbmfp, pgaddr, flags)
* region.
*/
if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
- (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) {
- R_UNLOCK(dbenv, dbmp->reginfo);
+ (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
return (0);
+
+#ifdef DIAGNOSTIC
+ /*
+ * Decrement the per-file pinned buffer count (mapped pages aren't
+ * counted).
+ */
+ R_LOCK(dbenv, dbmp->reginfo);
+ if (dbmfp->pinref == 0) {
+ ret = EINVAL;
+ __db_err(dbenv,
+ "%s: more pages returned than retrieved", __memp_fn(dbmfp));
+ } else {
+ ret = 0;
+ --dbmfp->pinref;
}
+ R_UNLOCK(dbenv, dbmp->reginfo);
+ if (ret != 0)
+ return (ret);
+#endif
- /* Convert the page address to a buffer header. */
+ /* Convert a page address to a buffer header and hash bucket. */
bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
+ n_cache = NCACHE(dbmp->reginfo[0].primary, bhp->mf_offset, bhp->pgno);
+ c_mp = dbmp->reginfo[n_cache].primary;
+ hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
+ hp = &hp[NBUCKET(c_mp, bhp->mf_offset, bhp->pgno)];
- /* Convert the buffer header to a cache. */
- c_mp = BH_TO_CACHE(dbmp, bhp);
-
-/* UNLOCK THE REGION, LOCK THE CACHE. */
+ MUTEX_LOCK(dbenv, &hp->hash_mutex);
/* Set/clear the page bits. */
- if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
- ++c_mp->stat.st_page_clean;
- --c_mp->stat.st_page_dirty;
+ if (LF_ISSET(DB_MPOOL_CLEAN) &&
+ F_ISSET(bhp, BH_DIRTY) && !F_ISSET(bhp, BH_DIRTY_CREATE)) {
+ DB_ASSERT(hp->hash_page_dirty != 0);
+ --hp->hash_page_dirty;
F_CLR(bhp, BH_DIRTY);
}
if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
- --c_mp->stat.st_page_clean;
- ++c_mp->stat.st_page_dirty;
+ ++hp->hash_page_dirty;
F_SET(bhp, BH_DIRTY);
}
if (LF_ISSET(DB_MPOOL_DISCARD))
F_SET(bhp, BH_DISCARD);
/*
- * If the page is dirty and being scheduled to be written as part of
- * a checkpoint, we no longer know that the log is up-to-date.
- */
- if (F_ISSET(bhp, BH_DIRTY) && F_ISSET(bhp, BH_SYNC))
- F_SET(bhp, BH_SYNC_LOGFLSH);
-
- /*
* Check for a reference count going to zero. This can happen if the
* application returns a page twice.
*/
if (bhp->ref == 0) {
__db_err(dbenv, "%s: page %lu: unpinned page returned",
__memp_fn(dbmfp), (u_long)bhp->pgno);
- R_UNLOCK(dbenv, dbmp->reginfo);
+ MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
return (EINVAL);
}
/*
- * If more than one reference to the page, we're done. Ignore the
- * discard flags (for now) and leave it at its position in the LRU
- * chain. The rest gets done at last reference close.
+ * If more than one reference to the page or a reference other than a
+ * thread waiting to flush the buffer to disk, we're done. Ignore the
+ * discard flags (for now) and leave the buffer's priority alone.
*/
- if (--bhp->ref > 0) {
- R_UNLOCK(dbenv, dbmp->reginfo);
+ if (--bhp->ref > 1 || (bhp->ref == 1 && !F_ISSET(bhp, BH_LOCKED))) {
+ MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
return (0);
}
+ /* Update priority values. */
+ if (F_ISSET(bhp, BH_DISCARD) ||
+ dbmfp->mfp->priority == MPOOL_PRI_VERY_LOW)
+ bhp->priority = 0;
+ else {
+ /*
+ * We don't lock the LRU counter or the stat.st_pages field, if
+ * we get garbage (which won't happen on a 32-bit machine), it
+ * only means a buffer has the wrong priority.
+ */
+ bhp->priority = c_mp->lru_count;
+
+ adjust = 0;
+ if (dbmfp->mfp->priority != 0)
+ adjust =
+ (int)c_mp->stat.st_pages / dbmfp->mfp->priority;
+ if (F_ISSET(bhp, BH_DIRTY))
+ adjust += c_mp->stat.st_pages / MPOOL_PRI_DIRTY;
+
+ if (adjust > 0) {
+ if (UINT32_T_MAX - bhp->priority <= (u_int32_t)adjust)
+ bhp->priority += adjust;
+ } else if (adjust < 0)
+ if (bhp->priority > (u_int32_t)-adjust)
+ bhp->priority += adjust;
+ }
+
/*
- * Move the buffer to the head/tail of the LRU chain. We do this
- * before writing the buffer for checkpoint purposes, as the write
- * can discard the region lock and allow another process to acquire
- * buffer. We could keep that from happening, but there seems no
- * reason to do so.
+ * Buffers on hash buckets are sorted by priority -- move the buffer
+ * to the correct position in the list.
*/
- SH_TAILQ_REMOVE(&c_mp->bhq, bhp, q, __bh);
- if (F_ISSET(bhp, BH_DISCARD))
- SH_TAILQ_INSERT_HEAD(&c_mp->bhq, bhp, q, __bh);
+ argbhp = bhp;
+ SH_TAILQ_REMOVE(&hp->hash_bucket, argbhp, hq, __bh);
+
+ prev = NULL;
+ for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
+ bhp != NULL; prev = bhp, bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
+ if (bhp->priority > argbhp->priority)
+ break;
+ if (prev == NULL)
+ SH_TAILQ_INSERT_HEAD(&hp->hash_bucket, argbhp, hq, __bh);
else
- SH_TAILQ_INSERT_TAIL(&c_mp->bhq, bhp, q);
+ SH_TAILQ_INSERT_AFTER(&hp->hash_bucket, prev, argbhp, hq, __bh);
+
+ /* Reset the hash bucket's priority. */
+ hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
+
+#ifdef DIAGNOSTIC
+ __memp_check_order(hp);
+#endif
/*
- * If this buffer is scheduled for writing because of a checkpoint, we
- * need to write it (if it's dirty), or update the checkpoint counters
- * (if it's not dirty). If we try to write it and can't, that's not
- * necessarily an error as it's not completely unreasonable that the
- * application have permission to write the underlying file, but set a
- * flag so that the next time the memp_sync function is called we try
- * writing it there, as the checkpoint thread of control better be able
- * to write all of the files.
+ * The sync code has a separate counter for buffers on which it waits.
+ * It reads that value without holding a lock so we update it as the
+ * last thing we do. Once that value goes to 0, we won't see another
+ * reference to that buffer being returned to the cache until the sync
+ * code has finished, so we're safe as long as we don't let the value
+ * go to 0 before we finish with the buffer.
*/
- if (F_ISSET(bhp, BH_SYNC)) {
- if (F_ISSET(bhp, BH_DIRTY)) {
- if (__memp_bhwrite(dbmp,
- dbmfp->mfp, bhp, NULL, &wrote) != 0 || !wrote)
- F_SET(mp, MP_LSN_RETRY);
- } else {
- F_CLR(bhp, BH_SYNC);
-
- --mp->lsn_cnt;
- --dbmfp->mfp->lsn_cnt;
- }
- }
+ if (F_ISSET(argbhp, BH_LOCKED) && argbhp->ref_sync != 0)
+ --argbhp->ref_sync;
+
+ MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- R_UNLOCK(dbenv, dbmp->reginfo);
return (0);
}