summaryrefslogtreecommitdiff
path: root/bdb/mp
diff options
context:
space:
mode:
Diffstat (limited to 'bdb/mp')
-rw-r--r--bdb/mp/mp_alloc.c442
-rw-r--r--bdb/mp/mp_bh.c646
-rw-r--r--bdb/mp/mp_fget.c654
-rw-r--r--bdb/mp/mp_fopen.c1018
-rw-r--r--bdb/mp/mp_fput.c202
-rw-r--r--bdb/mp/mp_fset.c89
-rw-r--r--bdb/mp/mp_method.c156
-rw-r--r--bdb/mp/mp_region.c466
-rw-r--r--bdb/mp/mp_register.c76
-rw-r--r--bdb/mp/mp_stat.c491
-rw-r--r--bdb/mp/mp_sync.c627
-rw-r--r--bdb/mp/mp_trickle.c83
12 files changed, 0 insertions, 4950 deletions
diff --git a/bdb/mp/mp_alloc.c b/bdb/mp/mp_alloc.c
deleted file mode 100644
index 96dd612d7ba..00000000000
--- a/bdb/mp/mp_alloc.c
+++ /dev/null
@@ -1,442 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_alloc.c,v 11.31 2002/08/14 17:21:37 ubell Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-typedef struct {
- DB_MPOOL_HASH *bucket;
- u_int32_t priority;
-} HS;
-
-static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
-static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
-
-/*
- * __memp_alloc --
- * Allocate some space from a cache region.
- *
- * PUBLIC: int __memp_alloc __P((DB_MPOOL *,
- * PUBLIC: REGINFO *, MPOOLFILE *, size_t, roff_t *, void *));
- */
-int
-__memp_alloc(dbmp, memreg, mfp, len, offsetp, retp)
- DB_MPOOL *dbmp;
- REGINFO *memreg;
- MPOOLFILE *mfp;
- size_t len;
- roff_t *offsetp;
- void *retp;
-{
- BH *bhp;
- DB_ENV *dbenv;
- DB_MPOOL_HASH *dbht, *hp, *hp_end, *hp_tmp;
- DB_MUTEX *mutexp;
- MPOOL *c_mp;
- MPOOLFILE *bh_mfp;
- size_t freed_space;
- u_int32_t buckets, buffers, high_priority, max_na, priority;
- int aggressive, ret;
- void *p;
-
- dbenv = dbmp->dbenv;
- c_mp = memreg->primary;
- dbht = R_ADDR(memreg, c_mp->htab);
- hp_end = &dbht[c_mp->htab_buckets];
-
- buckets = buffers = 0;
- aggressive = 0;
-
- c_mp->stat.st_alloc++;
-
- /*
- * Get aggressive if we've tried to flush the number of pages as are
- * in the system without finding space.
- */
- max_na = 5 * c_mp->htab_buckets;
-
- /*
- * If we're allocating a buffer, and the one we're discarding is the
- * same size, we don't want to waste the time to re-integrate it into
- * the shared memory free list. If the DB_MPOOLFILE argument isn't
- * NULL, we'll compare the underlying page sizes of the two buffers
- * before free-ing and re-allocating buffers.
- */
- if (mfp != NULL)
- len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
-
- R_LOCK(dbenv, memreg);
-
- /*
- * On every buffer allocation we update the buffer generation number
- * and check for wraparound.
- */
- if (++c_mp->lru_count == UINT32_T_MAX)
- __memp_reset_lru(dbenv, memreg, c_mp);
-
- /*
- * Anything newer than 1/10th of the buffer pool is ignored during
- * allocation (unless allocation starts failing).
- */
- DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
- high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
-
- /*
- * First we try to allocate from free memory. If that fails, scan the
- * buffer pool to find buffers with low priorities. We consider small
- * sets of hash buckets each time to limit the amount of work needing
- * to be done. This approximates LRU, but not very well. We either
- * find a buffer of the same size to use, or we will free 3 times what
- * we need in the hopes it will coalesce into a contiguous chunk of the
- * right size. In the latter case we branch back here and try again.
- */
-alloc: if ((ret = __db_shalloc(memreg->addr, len, MUTEX_ALIGN, &p)) == 0) {
- if (mfp != NULL)
- c_mp->stat.st_pages++;
- R_UNLOCK(dbenv, memreg);
-
-found: if (offsetp != NULL)
- *offsetp = R_OFFSET(memreg, p);
- *(void **)retp = p;
-
- /*
- * Update the search statistics.
- *
- * We're not holding the region locked here, these statistics
- * can't be trusted.
- */
- if (buckets != 0) {
- if (buckets > c_mp->stat.st_alloc_max_buckets)
- c_mp->stat.st_alloc_max_buckets = buckets;
- c_mp->stat.st_alloc_buckets += buckets;
- }
- if (buffers != 0) {
- if (buffers > c_mp->stat.st_alloc_max_pages)
- c_mp->stat.st_alloc_max_pages = buffers;
- c_mp->stat.st_alloc_pages += buffers;
- }
- return (0);
- }
-
- /*
- * We re-attempt the allocation every time we've freed 3 times what
- * we need. Reset our free-space counter.
- */
- freed_space = 0;
-
- /*
- * Walk the hash buckets and find the next two with potentially useful
- * buffers. Free the buffer with the lowest priority from the buckets'
- * chains.
- */
- for (hp_tmp = NULL;;) {
- /* Check for wrap around. */
- hp = &dbht[c_mp->last_checked++];
- if (hp >= hp_end) {
- c_mp->last_checked = 0;
-
- /*
- * If we've gone through all of the hash buckets, try
- * an allocation. If the cache is small, the old page
- * size is small, and the new page size is large, we
- * might have freed enough memory (but not 3 times the
- * memory).
- */
- goto alloc;
- }
-
- /*
- * Skip empty buckets.
- *
- * We can check for empty buckets before locking as we
- * only care if the pointer is zero or non-zero.
- */
- if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
- continue;
-
- /*
- * The failure mode is when there are too many buffers we can't
- * write or there's not enough memory in the system. We don't
- * have a metric for deciding if allocation has no possible way
- * to succeed, so we don't ever fail, we assume memory will be
- * available if we wait long enough.
- *
- * Get aggressive if we've tried to flush 5 times the number of
- * hash buckets as are in the system -- it's possible we have
- * been repeatedly trying to flush the same buffers, although
- * it's unlikely. Aggressive means:
- *
- * a: set a flag to attempt to flush high priority buffers as
- * well as other buffers.
- * b: sync the mpool to force out queue extent pages. While we
- * might not have enough space for what we want and flushing
- * is expensive, why not?
- * c: sleep for a second -- hopefully someone else will run and
- * free up some memory. Try to allocate memory too, in case
- * the other thread returns its memory to the region.
- * d: look at a buffer in every hash bucket rather than choose
- * the more preferable of two.
- *
- * !!!
- * This test ignores pathological cases like no buffers in the
- * system -- that shouldn't be possible.
- */
- if ((++buckets % max_na) == 0) {
- aggressive = 1;
-
- R_UNLOCK(dbenv, memreg);
-
- (void)__memp_sync_int(
- dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
-
- (void)__os_sleep(dbenv, 1, 0);
-
- R_LOCK(dbenv, memreg);
- goto alloc;
- }
-
- if (!aggressive) {
- /* Skip high priority buckets. */
- if (hp->hash_priority > high_priority)
- continue;
-
- /*
- * Find two buckets and select the one with the lowest
- * priority. Performance testing shows that looking
- * at two improves the LRUness and looking at more only
- * does a little better.
- */
- if (hp_tmp == NULL) {
- hp_tmp = hp;
- continue;
- }
- if (hp->hash_priority > hp_tmp->hash_priority)
- hp = hp_tmp;
- hp_tmp = NULL;
- }
-
- /* Remember the priority of the buffer we're looking for. */
- priority = hp->hash_priority;
-
- /* Unlock the region and lock the hash bucket. */
- R_UNLOCK(dbenv, memreg);
- mutexp = &hp->hash_mutex;
- MUTEX_LOCK(dbenv, mutexp);
-
-#ifdef DIAGNOSTIC
- __memp_check_order(hp);
-#endif
- /*
- * The lowest priority page is first in the bucket, as they are
- * maintained in sorted order.
- *
- * The buffer may have been freed or its priority changed while
- * we switched from the region lock to the hash lock. If so,
- * we have to restart. We will still take the first buffer on
- * the bucket's list, though, if it has a low enough priority.
- */
- if ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) == NULL ||
- bhp->ref != 0 || bhp->priority > priority)
- goto next_hb;
-
- buffers++;
-
- /* Find the associated MPOOLFILE. */
- bh_mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
-
- /* If the page is dirty, pin it and write it. */
- ret = 0;
- if (F_ISSET(bhp, BH_DIRTY)) {
- ++bhp->ref;
- ret = __memp_bhwrite(dbmp, hp, bh_mfp, bhp, 0);
- --bhp->ref;
- if (ret == 0)
- ++c_mp->stat.st_rw_evict;
- } else
- ++c_mp->stat.st_ro_evict;
-
- /*
- * If a write fails for any reason, we can't proceed.
- *
- * We released the hash bucket lock while doing I/O, so another
- * thread may have acquired this buffer and incremented the ref
- * count after we wrote it, in which case we can't have it.
- *
- * If there's a write error, avoid selecting this buffer again
- * by making it the bucket's least-desirable buffer.
- */
- if (ret != 0 || bhp->ref != 0) {
- if (ret != 0 && aggressive)
- __memp_bad_buffer(hp);
- goto next_hb;
- }
-
- /*
- * Check to see if the buffer is the size we're looking for.
- * If so, we can simply reuse it. Else, free the buffer and
- * its space and keep looking.
- */
- if (mfp != NULL &&
- mfp->stat.st_pagesize == bh_mfp->stat.st_pagesize) {
- __memp_bhfree(dbmp, hp, bhp, 0);
-
- p = bhp;
- goto found;
- }
-
- freed_space += __db_shsizeof(bhp);
- __memp_bhfree(dbmp, hp, bhp, 1);
-
- /*
- * Unlock this hash bucket and re-acquire the region lock. If
- * we're reaching here as a result of calling memp_bhfree, the
- * hash bucket lock has already been discarded.
- */
- if (0) {
-next_hb: MUTEX_UNLOCK(dbenv, mutexp);
- }
- R_LOCK(dbenv, memreg);
-
- /*
- * Retry the allocation as soon as we've freed up sufficient
- * space. We're likely to have to coalesce of memory to
- * satisfy the request, don't try until it's likely (possible?)
- * we'll succeed.
- */
- if (freed_space >= 3 * len)
- goto alloc;
- }
- /* NOTREACHED */
-}
-
-/*
- * __memp_bad_buffer --
- * Make the first buffer in a hash bucket the least desirable buffer.
- */
-static void
-__memp_bad_buffer(hp)
- DB_MPOOL_HASH *hp;
-{
- BH *bhp, *t_bhp;
- u_int32_t priority;
-
- /* Remove the first buffer from the bucket. */
- bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
- SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
-
- /*
- * Find the highest priority buffer in the bucket. Buffers are
- * sorted by priority, so it's the last one in the bucket.
- *
- * XXX
- * Should use SH_TAILQ_LAST, but I think that macro is broken.
- */
- priority = bhp->priority;
- for (t_bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
- t_bhp != NULL; t_bhp = SH_TAILQ_NEXT(t_bhp, hq, __bh))
- priority = t_bhp->priority;
-
- /*
- * Set our buffer's priority to be just as bad, and append it to
- * the bucket.
- */
- bhp->priority = priority;
- SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
-
- /* Reset the hash bucket's priority. */
- hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
-}
-
-/*
- * __memp_reset_lru --
- * Reset the cache LRU counter.
- */
-static void
-__memp_reset_lru(dbenv, memreg, c_mp)
- DB_ENV *dbenv;
- REGINFO *memreg;
- MPOOL *c_mp;
-{
- BH *bhp;
- DB_MPOOL_HASH *hp;
- int bucket;
-
- /*
- * Update the counter so all future allocations will start at the
- * bottom.
- */
- c_mp->lru_count -= MPOOL_BASE_DECREMENT;
-
- /* Release the region lock. */
- R_UNLOCK(dbenv, memreg);
-
- /* Adjust the priority of every buffer in the system. */
- for (hp = R_ADDR(memreg, c_mp->htab),
- bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
- /*
- * Skip empty buckets.
- *
- * We can check for empty buckets before locking as we
- * only care if the pointer is zero or non-zero.
- */
- if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
- continue;
-
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
- for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
- if (bhp->priority != UINT32_T_MAX &&
- bhp->priority > MPOOL_BASE_DECREMENT)
- bhp->priority -= MPOOL_BASE_DECREMENT;
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- }
-
- /* Reacquire the region lock. */
- R_LOCK(dbenv, memreg);
-}
-
-#ifdef DIAGNOSTIC
-/*
- * __memp_check_order --
- * Verify the priority ordering of a hash bucket chain.
- *
- * PUBLIC: #ifdef DIAGNOSTIC
- * PUBLIC: void __memp_check_order __P((DB_MPOOL_HASH *));
- * PUBLIC: #endif
- */
-void
-__memp_check_order(hp)
- DB_MPOOL_HASH *hp;
-{
- BH *bhp;
- u_int32_t priority;
-
- /*
- * Assumes the hash bucket is locked.
- */
- if ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) == NULL)
- return;
-
- DB_ASSERT(bhp->priority == hp->hash_priority);
-
- for (priority = bhp->priority;
- (bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) != NULL;
- priority = bhp->priority)
- DB_ASSERT(priority <= bhp->priority);
-}
-#endif
diff --git a/bdb/mp/mp_bh.c b/bdb/mp/mp_bh.c
deleted file mode 100644
index 85d15218abf..00000000000
--- a/bdb/mp/mp_bh.c
+++ /dev/null
@@ -1,646 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_bh.c,v 11.71 2002/09/04 19:06:45 margo Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <string.h>
-#include <unistd.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-#include "dbinc/log.h"
-#include "dbinc/db_page.h"
-
-static int __memp_pgwrite
- __P((DB_MPOOL *, DB_MPOOLFILE *, DB_MPOOL_HASH *, BH *));
-static int __memp_upgrade __P((DB_MPOOL *, DB_MPOOLFILE *, MPOOLFILE *));
-
-/*
- * __memp_bhwrite --
- * Write the page associated with a given buffer header.
- *
- * PUBLIC: int __memp_bhwrite __P((DB_MPOOL *,
- * PUBLIC: DB_MPOOL_HASH *, MPOOLFILE *, BH *, int));
- */
-int
-__memp_bhwrite(dbmp, hp, mfp, bhp, open_extents)
- DB_MPOOL *dbmp;
- DB_MPOOL_HASH *hp;
- MPOOLFILE *mfp;
- BH *bhp;
- int open_extents;
-{
- DB_ENV *dbenv;
- DB_MPOOLFILE *dbmfp;
- DB_MPREG *mpreg;
- int local_open, incremented, ret;
-
- dbenv = dbmp->dbenv;
- local_open = incremented = 0;
-
- /*
- * If the file has been removed or is a closed temporary file, jump
- * right ahead and pretend that we've found the file we want -- the
- * page-write function knows how to handle the fact that we don't have
- * (or need!) any real file descriptor information.
- */
- if (F_ISSET(mfp, MP_DEADFILE)) {
- dbmfp = NULL;
- goto found;
- }
-
- /*
- * Walk the process' DB_MPOOLFILE list and find a file descriptor for
- * the file. We also check that the descriptor is open for writing.
- * If we find a descriptor on the file that's not open for writing, we
- * try and upgrade it to make it writeable. If that fails, we're done.
- */
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
- dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
- if (dbmfp->mfp == mfp) {
- if (F_ISSET(dbmfp, MP_READONLY) &&
- !F_ISSET(dbmfp, MP_UPGRADE) &&
- (F_ISSET(dbmfp, MP_UPGRADE_FAIL) ||
- __memp_upgrade(dbmp, dbmfp, mfp))) {
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
- return (EPERM);
- }
-
- /*
- * Increment the reference count -- see the comment in
- * __memp_fclose_int().
- */
- ++dbmfp->ref;
- incremented = 1;
- break;
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
-
- if (dbmfp != NULL)
- goto found;
-
- /*
- * !!!
- * It's the caller's choice if we're going to open extent files.
- */
- if (!open_extents && F_ISSET(mfp, MP_EXTENT))
- return (EPERM);
-
- /*
- * !!!
- * Don't try to attach to temporary files. There are two problems in
- * trying to do that. First, if we have different privileges than the
- * process that "owns" the temporary file, we might create the backing
- * disk file such that the owning process couldn't read/write its own
- * buffers, e.g., memp_trickle running as root creating a file owned
- * as root, mode 600. Second, if the temporary file has already been
- * created, we don't have any way of finding out what its real name is,
- * and, even if we did, it was already unlinked (so that it won't be
- * left if the process dies horribly). This decision causes a problem,
- * however: if the temporary file consumes the entire buffer cache,
- * and the owner doesn't flush the buffers to disk, we could end up
- * with resource starvation, and the memp_trickle thread couldn't do
- * anything about it. That's a pretty unlikely scenario, though.
- *
- * Note we should never get here when the temporary file in question
- * has already been closed in another process, in which case it should
- * be marked MP_DEADFILE.
- */
- if (F_ISSET(mfp, MP_TEMP))
- return (EPERM);
-
- /*
- * It's not a page from a file we've opened. If the file requires
- * input/output processing, see if this process has ever registered
- * information as to how to write this type of file. If not, there's
- * nothing we can do.
- */
- if (mfp->ftype != 0) {
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- for (mpreg = LIST_FIRST(&dbmp->dbregq);
- mpreg != NULL; mpreg = LIST_NEXT(mpreg, q))
- if (mpreg->ftype == mfp->ftype)
- break;
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
- if (mpreg == NULL)
- return (EPERM);
- }
-
- /*
- * Try and open the file, attaching to the underlying shared area.
- * Ignore any error, assume it's a permissions problem.
- *
- * XXX
- * There's no negative cache, so we may repeatedly try and open files
- * that we have previously tried (and failed) to open.
- */
- if ((ret = dbenv->memp_fcreate(dbenv, &dbmfp, 0)) != 0)
- return (ret);
- if ((ret = __memp_fopen_int(dbmfp, mfp,
- R_ADDR(dbmp->reginfo, mfp->path_off),
- 0, 0, mfp->stat.st_pagesize)) != 0) {
- (void)dbmfp->close(dbmfp, 0);
- return (ret);
- }
- local_open = 1;
-
-found: ret = __memp_pgwrite(dbmp, dbmfp, hp, bhp);
-
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- if (incremented)
- --dbmfp->ref;
- else if (local_open)
- F_SET(dbmfp, MP_FLUSH);
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
-
- return (ret);
-}
-
-/*
- * __memp_pgread --
- * Read a page from a file.
- *
- * PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, DB_MUTEX *, BH *, int));
- */
-int
-__memp_pgread(dbmfp, mutexp, bhp, can_create)
- DB_MPOOLFILE *dbmfp;
- DB_MUTEX *mutexp;
- BH *bhp;
- int can_create;
-{
- DB_IO db_io;
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
- size_t len, nr, pagesize;
- int ret;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
- mfp = dbmfp->mfp;
- pagesize = mfp->stat.st_pagesize;
-
- /* We should never be called with a dirty or a locked buffer. */
- DB_ASSERT(!F_ISSET(bhp, BH_DIRTY | BH_DIRTY_CREATE | BH_LOCKED));
-
- /* Lock the buffer and swap the hash bucket lock for the buffer lock. */
- F_SET(bhp, BH_LOCKED | BH_TRASH);
- MUTEX_LOCK(dbenv, &bhp->mutex);
- MUTEX_UNLOCK(dbenv, mutexp);
-
- /*
- * Temporary files may not yet have been created. We don't create
- * them now, we create them when the pages have to be flushed.
- */
- nr = 0;
- if (F_ISSET(dbmfp->fhp, DB_FH_VALID)) {
- db_io.fhp = dbmfp->fhp;
- db_io.mutexp = dbmfp->mutexp;
- db_io.pagesize = db_io.bytes = pagesize;
- db_io.pgno = bhp->pgno;
- db_io.buf = bhp->buf;
-
- /*
- * The page may not exist; if it doesn't, nr may well be 0,
- * but we expect the underlying OS calls not to return an
- * error code in this case.
- */
- if ((ret = __os_io(dbenv, &db_io, DB_IO_READ, &nr)) != 0)
- goto err;
- }
-
- if (nr < pagesize) {
- /*
- * Don't output error messages for short reads. In particular,
- * DB recovery processing may request pages never written to
- * disk or for which only some part have been written to disk,
- * in which case we won't find the page. The caller must know
- * how to handle the error.
- */
- if (can_create == 0) {
- ret = DB_PAGE_NOTFOUND;
- goto err;
- }
-
- /* Clear any bytes that need to be cleared. */
- len = mfp->clear_len == 0 ? pagesize : mfp->clear_len;
- memset(bhp->buf, 0, len);
-
-#if defined(DIAGNOSTIC) || defined(UMRW)
- /*
- * If we're running in diagnostic mode, corrupt any bytes on
- * the page that are unknown quantities for the caller.
- */
- if (len < pagesize)
- memset(bhp->buf + len, CLEAR_BYTE, pagesize - len);
-#endif
- ++mfp->stat.st_page_create;
- } else
- ++mfp->stat.st_page_in;
-
- /* Call any pgin function. */
- ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1);
-
- /* Unlock the buffer and reacquire the hash bucket lock. */
-err: MUTEX_UNLOCK(dbenv, &bhp->mutex);
- MUTEX_LOCK(dbenv, mutexp);
-
- /*
- * If no errors occurred, the data is now valid, clear the BH_TRASH
- * flag; regardless, clear the lock bit and let other threads proceed.
- */
- F_CLR(bhp, BH_LOCKED);
- if (ret == 0)
- F_CLR(bhp, BH_TRASH);
-
- return (ret);
-}
-
-/*
- * __memp_pgwrite --
- * Write a page to a file.
- */
-static int
-__memp_pgwrite(dbmp, dbmfp, hp, bhp)
- DB_MPOOL *dbmp;
- DB_MPOOLFILE *dbmfp;
- DB_MPOOL_HASH *hp;
- BH *bhp;
-{
- DB_ENV *dbenv;
- DB_IO db_io;
- DB_LSN lsn;
- MPOOLFILE *mfp;
- size_t nw;
- int callpgin, ret;
-
- dbenv = dbmp->dbenv;
- mfp = dbmfp == NULL ? NULL : dbmfp->mfp;
- callpgin = ret = 0;
-
- /*
- * We should never be called with a clean or trash buffer.
- * The sync code does call us with already locked buffers.
- */
- DB_ASSERT(F_ISSET(bhp, BH_DIRTY));
- DB_ASSERT(!F_ISSET(bhp, BH_TRASH));
-
- /*
- * If we have not already traded the hash bucket lock for the buffer
- * lock, do so now.
- */
- if (!F_ISSET(bhp, BH_LOCKED)) {
- F_SET(bhp, BH_LOCKED);
- MUTEX_LOCK(dbenv, &bhp->mutex);
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- }
-
- /*
- * It's possible that the underlying file doesn't exist, either
- * because of an outright removal or because it was a temporary
- * file that's been closed.
- *
- * !!!
- * Once we pass this point, we know that dbmfp and mfp aren't NULL,
- * and that we have a valid file reference.
- */
- if (mfp == NULL || F_ISSET(mfp, MP_DEADFILE))
- goto file_dead;
-
- /*
- * If the page is in a file for which we have LSN information, we have
- * to ensure the appropriate log records are on disk.
- */
- if (LOGGING_ON(dbenv) && mfp->lsn_off != -1) {
- memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
- if ((ret = dbenv->log_flush(dbenv, &lsn)) != 0)
- goto err;
- }
-
-#ifdef DIAGNOSTIC
- /*
- * Verify write-ahead logging semantics.
- *
- * !!!
- * One special case. There is a single field on the meta-data page,
- * the last-page-number-in-the-file field, for which we do not log
- * changes. If the page was originally created in a database that
- * didn't have logging turned on, we can see a page marked dirty but
- * for which no corresponding log record has been written. However,
- * the only way that a page can be created for which there isn't a
- * previous log record and valid LSN is when the page was created
- * without logging turned on, and so we check for that special-case
- * LSN value.
- */
- if (LOGGING_ON(dbenv) && !IS_NOT_LOGGED_LSN(LSN(bhp->buf))) {
- /*
- * There is a potential race here. If we are in the midst of
- * switching log files, it's possible we could test against the
- * old file and the new offset in the log region's LSN. If we
- * fail the first test, acquire the log mutex and check again.
- */
- DB_LOG *dblp;
- LOG *lp;
-
- dblp = dbenv->lg_handle;
- lp = dblp->reginfo.primary;
- if (!IS_NOT_LOGGED_LSN(LSN(bhp->buf)) &&
- log_compare(&lp->s_lsn, &LSN(bhp->buf)) <= 0) {
- R_LOCK(dbenv, &dblp->reginfo);
- DB_ASSERT(log_compare(&lp->s_lsn, &LSN(bhp->buf)) > 0);
- R_UNLOCK(dbenv, &dblp->reginfo);
- }
- }
-#endif
-
- /*
- * Call any pgout function. We set the callpgin flag so that we flag
- * that the contents of the buffer will need to be passed through pgin
- * before they are reused.
- */
- if (mfp->ftype != 0) {
- callpgin = 1;
- if ((ret = __memp_pg(dbmfp, bhp, 0)) != 0)
- goto err;
- }
-
- /* Temporary files may not yet have been created. */
- if (!F_ISSET(dbmfp->fhp, DB_FH_VALID)) {
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- ret = F_ISSET(dbmfp->fhp, DB_FH_VALID) ? 0 :
- __db_appname(dbenv, DB_APP_TMP, NULL,
- F_ISSET(dbenv, DB_ENV_DIRECT_DB) ? DB_OSO_DIRECT : 0,
- dbmfp->fhp, NULL);
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
- if (ret != 0) {
- __db_err(dbenv,
- "unable to create temporary backing file");
- goto err;
- }
- }
-
- /* Write the page. */
- db_io.fhp = dbmfp->fhp;
- db_io.mutexp = dbmfp->mutexp;
- db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize;
- db_io.pgno = bhp->pgno;
- db_io.buf = bhp->buf;
- if ((ret = __os_io(dbenv, &db_io, DB_IO_WRITE, &nw)) != 0) {
- __db_err(dbenv, "%s: write failed for page %lu",
- __memp_fn(dbmfp), (u_long)bhp->pgno);
- goto err;
- }
- ++mfp->stat.st_page_out;
-
-err:
-file_dead:
- /*
- * !!!
- * Once we pass this point, dbmfp and mfp may be NULL, we may not have
- * a valid file reference.
- *
- * Unlock the buffer and reacquire the hash lock.
- */
- MUTEX_UNLOCK(dbenv, &bhp->mutex);
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
-
- /*
- * If we rewrote the page, it will need processing by the pgin
- * routine before reuse.
- */
- if (callpgin)
- F_SET(bhp, BH_CALLPGIN);
-
- /*
- * Update the hash bucket statistics, reset the flags.
- * If we were successful, the page is no longer dirty.
- */
- if (ret == 0) {
- DB_ASSERT(hp->hash_page_dirty != 0);
- --hp->hash_page_dirty;
-
- F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE);
- }
-
- /* Regardless, clear any sync wait-for count and remove our lock. */
- bhp->ref_sync = 0;
- F_CLR(bhp, BH_LOCKED);
-
- return (ret);
-}
-
-/*
- * __memp_pg --
- * Call the pgin/pgout routine.
- *
- * PUBLIC: int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
- */
-int
-__memp_pg(dbmfp, bhp, is_pgin)
- DB_MPOOLFILE *dbmfp;
- BH *bhp;
- int is_pgin;
-{
- DBT dbt, *dbtp;
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- DB_MPREG *mpreg;
- MPOOLFILE *mfp;
- int ftype, ret;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
- mfp = dbmfp->mfp;
-
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
-
- ftype = mfp->ftype;
- for (mpreg = LIST_FIRST(&dbmp->dbregq);
- mpreg != NULL; mpreg = LIST_NEXT(mpreg, q)) {
- if (ftype != mpreg->ftype)
- continue;
- if (mfp->pgcookie_len == 0)
- dbtp = NULL;
- else {
- dbt.size = mfp->pgcookie_len;
- dbt.data = R_ADDR(dbmp->reginfo, mfp->pgcookie_off);
- dbtp = &dbt;
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
-
- if (is_pgin) {
- if (mpreg->pgin != NULL &&
- (ret = mpreg->pgin(dbenv,
- bhp->pgno, bhp->buf, dbtp)) != 0)
- goto err;
- } else
- if (mpreg->pgout != NULL &&
- (ret = mpreg->pgout(dbenv,
- bhp->pgno, bhp->buf, dbtp)) != 0)
- goto err;
- break;
- }
-
- if (mpreg == NULL)
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
-
- return (0);
-
-err: MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
- __db_err(dbenv, "%s: %s failed for page %lu",
- __memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
- return (ret);
-}
-
-/*
- * __memp_bhfree --
- * Free a bucket header and its referenced data.
- *
- * PUBLIC: void __memp_bhfree __P((DB_MPOOL *, DB_MPOOL_HASH *, BH *, int));
- */
-void
-__memp_bhfree(dbmp, hp, bhp, free_mem)
- DB_MPOOL *dbmp;
- DB_MPOOL_HASH *hp;
- BH *bhp;
- int free_mem;
-{
- DB_ENV *dbenv;
- MPOOL *c_mp, *mp;
- MPOOLFILE *mfp;
- u_int32_t n_cache;
-
- /*
- * Assumes the hash bucket is locked and the MPOOL is not.
- */
- dbenv = dbmp->dbenv;
- mp = dbmp->reginfo[0].primary;
- n_cache = NCACHE(mp, bhp->mf_offset, bhp->pgno);
-
- /*
- * Delete the buffer header from the hash bucket queue and reset
- * the hash bucket's priority, if necessary.
- */
- SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
- if (bhp->priority == hp->hash_priority)
- hp->hash_priority =
- SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL ?
- 0 : SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
-
- /*
- * Discard the hash bucket's mutex, it's no longer needed, and
- * we don't want to be holding it when acquiring other locks.
- */
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
-
- /*
- * Find the underlying MPOOLFILE and decrement its reference count.
- * If this is its last reference, remove it.
- */
- mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
- MUTEX_LOCK(dbenv, &mfp->mutex);
- if (--mfp->block_cnt == 0 && mfp->mpf_cnt == 0)
- __memp_mf_discard(dbmp, mfp);
- else
- MUTEX_UNLOCK(dbenv, &mfp->mutex);
-
- R_LOCK(dbenv, &dbmp->reginfo[n_cache]);
-
- /*
- * Clear the mutex this buffer recorded; requires the region lock
- * be held.
- */
- __db_shlocks_clear(&bhp->mutex, &dbmp->reginfo[n_cache],
- (REGMAINT *)R_ADDR(&dbmp->reginfo[n_cache], mp->maint_off));
-
- /*
- * If we're not reusing the buffer immediately, free the buffer header
- * and data for real.
- */
- if (free_mem) {
- __db_shalloc_free(dbmp->reginfo[n_cache].addr, bhp);
- c_mp = dbmp->reginfo[n_cache].primary;
- c_mp->stat.st_pages--;
- }
- R_UNLOCK(dbenv, &dbmp->reginfo[n_cache]);
-}
-
-/*
- * __memp_upgrade --
- * Upgrade a file descriptor from read-only to read-write.
- */
-static int
-__memp_upgrade(dbmp, dbmfp, mfp)
- DB_MPOOL *dbmp;
- DB_MPOOLFILE *dbmfp;
- MPOOLFILE *mfp;
-{
- DB_ENV *dbenv;
- DB_FH *fhp, *tfhp;
- int ret;
- char *rpath;
-
- dbenv = dbmp->dbenv;
- fhp = NULL;
- rpath = NULL;
-
- /*
- * Calculate the real name for this file and try to open it read/write.
- * We know we have a valid pathname for the file because it's the only
- * way we could have gotten a file descriptor of any kind.
- */
- if ((ret = __os_calloc(dbenv, 1, sizeof(DB_FH), &fhp)) != 0)
- goto err;
-
- if ((ret = __db_appname(dbenv, DB_APP_DATA,
- R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) != 0)
- goto err;
-
- if (__os_open(dbenv, rpath,
- F_ISSET(mfp, MP_DIRECT) ? DB_OSO_DIRECT : 0, 0, fhp) != 0) {
- F_SET(dbmfp, MP_UPGRADE_FAIL);
- goto err;
- }
-
- /*
- * Swap the descriptors and set the upgrade flag.
- *
- * XXX
- * There is a race here. If another process schedules a read using the
- * existing file descriptor and is swapped out before making the system
- * call, this code could theoretically close the file descriptor out
- * from under it. While it's very unlikely, this code should still be
- * rewritten.
- */
- tfhp = dbmfp->fhp;
- dbmfp->fhp = fhp;
- fhp = tfhp;
-
- (void)__os_closehandle(dbenv, fhp);
- F_SET(dbmfp, MP_UPGRADE);
-
- ret = 0;
- if (0) {
-err: ret = 1;
- }
- if (fhp != NULL)
- __os_free(dbenv, fhp);
- if (rpath != NULL)
- __os_free(dbenv, rpath);
-
- return (ret);
-}
diff --git a/bdb/mp/mp_fget.c b/bdb/mp/mp_fget.c
deleted file mode 100644
index be0785a2184..00000000000
--- a/bdb/mp/mp_fget.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_fget.c,v 11.68 2002/08/06 04:58:09 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-#ifdef HAVE_FILESYSTEM_NOTZERO
-static int __memp_fs_notzero
- __P((DB_ENV *, DB_MPOOLFILE *, MPOOLFILE *, db_pgno_t *));
-#endif
-
-/*
- * __memp_fget --
- * Get a page from the file.
- *
- * PUBLIC: int __memp_fget
- * PUBLIC: __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void *));
- */
-int
-__memp_fget(dbmfp, pgnoaddr, flags, addrp)
- DB_MPOOLFILE *dbmfp;
- db_pgno_t *pgnoaddr;
- u_int32_t flags;
- void *addrp;
-{
- enum { FIRST_FOUND, FIRST_MISS, SECOND_FOUND, SECOND_MISS } state;
- BH *alloc_bhp, *bhp;
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- DB_MPOOL_HASH *hp;
- MPOOL *c_mp, *mp;
- MPOOLFILE *mfp;
- roff_t mf_offset;
- u_int32_t n_cache, st_hsearch;
- int b_incr, extending, first, ret;
-
- *(void **)addrp = NULL;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
-
- PANIC_CHECK(dbenv);
-
- mp = dbmp->reginfo[0].primary;
- mfp = dbmfp->mfp;
- mf_offset = R_OFFSET(dbmp->reginfo, mfp);
- alloc_bhp = bhp = NULL;
- hp = NULL;
- b_incr = extending = ret = 0;
-
- /*
- * Validate arguments.
- *
- * !!!
- * Don't test for DB_MPOOL_CREATE and DB_MPOOL_NEW flags for readonly
- * files here, and create non-existent pages in readonly files if the
- * flags are set, later. The reason is that the hash access method
- * wants to get empty pages that don't really exist in readonly files.
- * The only alternative is for hash to write the last "bucket" all the
- * time, which we don't want to do because one of our big goals in life
- * is to keep database files small. It's sleazy as hell, but we catch
- * any attempt to actually write the file in memp_fput().
- */
-#define OKFLAGS (DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW)
- if (flags != 0) {
- if ((ret = __db_fchk(dbenv, "memp_fget", flags, OKFLAGS)) != 0)
- return (ret);
-
- switch (flags) {
- case DB_MPOOL_CREATE:
- break;
- case DB_MPOOL_LAST:
- /* Get the last page number in the file. */
- if (flags == DB_MPOOL_LAST) {
- R_LOCK(dbenv, dbmp->reginfo);
- *pgnoaddr = mfp->last_pgno;
- R_UNLOCK(dbenv, dbmp->reginfo);
- }
- break;
- case DB_MPOOL_NEW:
- /*
- * If always creating a page, skip the first search
- * of the hash bucket.
- */
- if (flags == DB_MPOOL_NEW)
- goto alloc;
- break;
- default:
- return (__db_ferr(dbenv, "memp_fget", 1));
- }
- }
-
- /*
- * If mmap'ing the file and the page is not past the end of the file,
- * just return a pointer.
- *
- * The page may be past the end of the file, so check the page number
- * argument against the original length of the file. If we previously
- * returned pages past the original end of the file, last_pgno will
- * have been updated to match the "new" end of the file, and checking
- * against it would return pointers past the end of the mmap'd region.
- *
- * If another process has opened the file for writing since we mmap'd
- * it, we will start playing the game by their rules, i.e. everything
- * goes through the cache. All pages previously returned will be safe,
- * as long as the correct locking protocol was observed.
- *
- * We don't discard the map because we don't know when all of the
- * pages will have been discarded from the process' address space.
- * It would be possible to do so by reference counting the open
- * pages from the mmap, but it's unclear to me that it's worth it.
- */
- if (dbmfp->addr != NULL &&
- F_ISSET(mfp, MP_CAN_MMAP) && *pgnoaddr <= mfp->orig_last_pgno) {
- *(void **)addrp =
- R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
- ++mfp->stat.st_map;
- return (0);
- }
-
-hb_search:
- /*
- * Determine the cache and hash bucket where this page lives and get
- * local pointers to them. Reset on each pass through this code, the
- * page number can change.
- */
- n_cache = NCACHE(mp, mf_offset, *pgnoaddr);
- c_mp = dbmp->reginfo[n_cache].primary;
- hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
- hp = &hp[NBUCKET(c_mp, mf_offset, *pgnoaddr)];
-
- /* Search the hash chain for the page. */
-retry: st_hsearch = 0;
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
- for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
- ++st_hsearch;
- if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset)
- continue;
-
- /*
- * Increment the reference count. We may discard the hash
- * bucket lock as we evaluate and/or read the buffer, so we
- * need to ensure it doesn't move and its contents remain
- * unchanged.
- */
- if (bhp->ref == UINT16_T_MAX) {
- __db_err(dbenv,
- "%s: page %lu: reference count overflow",
- __memp_fn(dbmfp), (u_long)bhp->pgno);
- ret = EINVAL;
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- goto err;
- }
- ++bhp->ref;
- b_incr = 1;
-
- /*
- * BH_LOCKED --
- * I/O is in progress or sync is waiting on the buffer to write
- * it. Because we've incremented the buffer reference count,
- * we know the buffer can't move. Unlock the bucket lock, wait
- * for the buffer to become available, reacquire the bucket.
- */
- for (first = 1; F_ISSET(bhp, BH_LOCKED) &&
- !F_ISSET(dbenv, DB_ENV_NOLOCKING); first = 0) {
- /*
- * If someone is trying to sync this buffer and the
- * buffer is hot, they may never get in. Give up
- * and try again.
- */
- if (!first && bhp->ref_sync != 0) {
- --bhp->ref;
- b_incr = 0;
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- __os_yield(dbenv, 1);
- goto retry;
- }
-
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- /*
- * Explicitly yield the processor if not the first pass
- * through this loop -- if we don't, we might run to the
- * end of our CPU quantum as we will simply be swapping
- * between the two locks.
- */
- if (!first)
- __os_yield(dbenv, 1);
-
- MUTEX_LOCK(dbenv, &bhp->mutex);
- /* Wait for I/O to finish... */
- MUTEX_UNLOCK(dbenv, &bhp->mutex);
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
- }
-
- ++mfp->stat.st_cache_hit;
- break;
- }
-
- /*
- * Update the hash bucket search statistics -- do now because our next
- * search may be for a different bucket.
- */
- ++c_mp->stat.st_hash_searches;
- if (st_hsearch > c_mp->stat.st_hash_longest)
- c_mp->stat.st_hash_longest = st_hsearch;
- c_mp->stat.st_hash_examined += st_hsearch;
-
- /*
- * There are 4 possible paths to this location:
- *
- * FIRST_MISS:
- * Didn't find the page in the hash bucket on our first pass:
- * bhp == NULL, alloc_bhp == NULL
- *
- * FIRST_FOUND:
- * Found the page in the hash bucket on our first pass:
- * bhp != NULL, alloc_bhp == NULL
- *
- * SECOND_FOUND:
- * Didn't find the page in the hash bucket on the first pass,
- * allocated space, and found the page in the hash bucket on
- * our second pass:
- * bhp != NULL, alloc_bhp != NULL
- *
- * SECOND_MISS:
- * Didn't find the page in the hash bucket on the first pass,
- * allocated space, and didn't find the page in the hash bucket
- * on our second pass:
- * bhp == NULL, alloc_bhp != NULL
- */
- state = bhp == NULL ?
- (alloc_bhp == NULL ? FIRST_MISS : SECOND_MISS) :
- (alloc_bhp == NULL ? FIRST_FOUND : SECOND_FOUND);
- switch (state) {
- case FIRST_FOUND:
- /* We found the buffer in our first check -- we're done. */
- break;
- case FIRST_MISS:
- /*
- * We didn't find the buffer in our first check. Figure out
- * if the page exists, and allocate structures so we can add
- * the page to the buffer pool.
- */
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
-
-alloc: /*
- * If DB_MPOOL_NEW is set, we have to allocate a page number.
- * If neither DB_MPOOL_CREATE or DB_MPOOL_CREATE is set, then
- * it's an error to try and get a page past the end of file.
- */
- COMPQUIET(n_cache, 0);
-
- extending = ret = 0;
- R_LOCK(dbenv, dbmp->reginfo);
- switch (flags) {
- case DB_MPOOL_NEW:
- extending = 1;
- *pgnoaddr = mfp->last_pgno + 1;
- break;
- case DB_MPOOL_CREATE:
- extending = *pgnoaddr > mfp->last_pgno;
- break;
- default:
- ret = *pgnoaddr > mfp->last_pgno ? DB_PAGE_NOTFOUND : 0;
- break;
- }
- R_UNLOCK(dbenv, dbmp->reginfo);
- if (ret != 0)
- goto err;
-
- /*
- * !!!
- * In the DB_MPOOL_NEW code path, mf_offset and n_cache have
- * not yet been initialized.
- */
- mf_offset = R_OFFSET(dbmp->reginfo, mfp);
- n_cache = NCACHE(mp, mf_offset, *pgnoaddr);
-
- /* Allocate a new buffer header and data space. */
- if ((ret = __memp_alloc(dbmp,
- &dbmp->reginfo[n_cache], mfp, 0, NULL, &alloc_bhp)) != 0)
- goto err;
-#ifdef DIAGNOSTIC
- if ((db_alignp_t)alloc_bhp->buf & (sizeof(size_t) - 1)) {
- __db_err(dbenv,
- "Error: buffer data is NOT size_t aligned");
- ret = EINVAL;
- goto err;
- }
-#endif
- /*
- * If we are extending the file, we'll need the region lock
- * again.
- */
- if (extending)
- R_LOCK(dbenv, dbmp->reginfo);
-
- /*
- * DB_MPOOL_NEW does not guarantee you a page unreferenced by
- * any other thread of control. (That guarantee is interesting
- * for DB_MPOOL_NEW, unlike DB_MPOOL_CREATE, because the caller
- * did not specify the page number, and so, may reasonably not
- * have any way to lock the page outside of mpool.) Regardless,
- * if we allocate the page, and some other thread of control
- * requests the page by number, we will not detect that and the
- * thread of control that allocated using DB_MPOOL_NEW may not
- * have a chance to initialize the page. (Note: we *could*
- * detect this case if we set a flag in the buffer header which
- * guaranteed that no gets of the page would succeed until the
- * reference count went to 0, that is, until the creating page
- * put the page.) What we do guarantee is that if two threads
- * of control are both doing DB_MPOOL_NEW calls, they won't
- * collide, that is, they won't both get the same page.
- *
- * There's a possibility that another thread allocated the page
- * we were planning to allocate while we were off doing buffer
- * allocation. We can do that by making sure the page number
- * we were going to use is still available. If it's not, then
- * we check to see if the next available page number hashes to
- * the same mpool region as the old one -- if it does, we can
- * continue, otherwise, we have to start over.
- */
- if (flags == DB_MPOOL_NEW && *pgnoaddr != mfp->last_pgno + 1) {
- *pgnoaddr = mfp->last_pgno + 1;
- if (n_cache != NCACHE(mp, mf_offset, *pgnoaddr)) {
- __db_shalloc_free(
- dbmp->reginfo[n_cache].addr, alloc_bhp);
- /*
- * flags == DB_MPOOL_NEW, so extending is set
- * and we're holding the region locked.
- */
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- alloc_bhp = NULL;
- goto alloc;
- }
- }
-
- /*
- * We released the region lock, so another thread might have
- * extended the file. Update the last_pgno and initialize
- * the file, as necessary, if we extended the file.
- */
- if (extending) {
-#ifdef HAVE_FILESYSTEM_NOTZERO
- if (*pgnoaddr > mfp->last_pgno &&
- __os_fs_notzero() &&
- F_ISSET(dbmfp->fhp, DB_FH_VALID))
- ret = __memp_fs_notzero(
- dbenv, dbmfp, mfp, pgnoaddr);
- else
- ret = 0;
-#endif
- if (ret == 0 && *pgnoaddr > mfp->last_pgno)
- mfp->last_pgno = *pgnoaddr;
-
- R_UNLOCK(dbenv, dbmp->reginfo);
- if (ret != 0)
- goto err;
- }
- goto hb_search;
- case SECOND_FOUND:
- /*
- * We allocated buffer space for the requested page, but then
- * found the page in the buffer cache on our second check.
- * That's OK -- we can use the page we found in the pool,
- * unless DB_MPOOL_NEW is set.
- *
- * Free the allocated memory, we no longer need it. Since we
- * can't acquire the region lock while holding the hash bucket
- * lock, we have to release the hash bucket and re-acquire it.
- * That's OK, because we have the buffer pinned down.
- */
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- R_LOCK(dbenv, &dbmp->reginfo[n_cache]);
- __db_shalloc_free(dbmp->reginfo[n_cache].addr, alloc_bhp);
- alloc_bhp = NULL;
- R_UNLOCK(dbenv, &dbmp->reginfo[n_cache]);
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
-
- /*
- * We can't use the page we found in the pool if DB_MPOOL_NEW
- * was set. (For details, see the above comment beginning
- * "DB_MPOOL_NEW does not guarantee you a page unreferenced by
- * any other thread of control".) If DB_MPOOL_NEW is set, we
- * release our pin on this particular buffer, and try to get
- * another one.
- */
- if (flags == DB_MPOOL_NEW) {
- --bhp->ref;
- b_incr = 0;
- goto alloc;
- }
- break;
- case SECOND_MISS:
- /*
- * We allocated buffer space for the requested page, and found
- * the page still missing on our second pass through the buffer
- * cache. Instantiate the page.
- */
- bhp = alloc_bhp;
- alloc_bhp = NULL;
-
- /*
- * Initialize all the BH and hash bucket fields so we can call
- * __memp_bhfree if an error occurs.
- *
- * Append the buffer to the tail of the bucket list and update
- * the hash bucket's priority.
- */
- b_incr = 1;
-
- memset(bhp, 0, sizeof(BH));
- bhp->ref = 1;
- bhp->priority = UINT32_T_MAX;
- bhp->pgno = *pgnoaddr;
- bhp->mf_offset = mf_offset;
- SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
- hp->hash_priority =
- SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
-
- /* If we extended the file, make sure the page is never lost. */
- if (extending) {
- ++hp->hash_page_dirty;
- F_SET(bhp, BH_DIRTY | BH_DIRTY_CREATE);
- }
-
- /*
- * If we created the page, zero it out. If we didn't create
- * the page, read from the backing file.
- *
- * !!!
- * DB_MPOOL_NEW doesn't call the pgin function.
- *
- * If DB_MPOOL_CREATE is used, then the application's pgin
- * function has to be able to handle pages of 0's -- if it
- * uses DB_MPOOL_NEW, it can detect all of its page creates,
- * and not bother.
- *
- * If we're running in diagnostic mode, smash any bytes on the
- * page that are unknown quantities for the caller.
- *
- * Otherwise, read the page into memory, optionally creating it
- * if DB_MPOOL_CREATE is set.
- */
- if (extending) {
- if (mfp->clear_len == 0)
- memset(bhp->buf, 0, mfp->stat.st_pagesize);
- else {
- memset(bhp->buf, 0, mfp->clear_len);
-#if defined(DIAGNOSTIC) || defined(UMRW)
- memset(bhp->buf + mfp->clear_len, CLEAR_BYTE,
- mfp->stat.st_pagesize - mfp->clear_len);
-#endif
- }
-
- if (flags == DB_MPOOL_CREATE && mfp->ftype != 0)
- F_SET(bhp, BH_CALLPGIN);
-
- ++mfp->stat.st_page_create;
- } else {
- F_SET(bhp, BH_TRASH);
- ++mfp->stat.st_cache_miss;
- }
-
- /* Increment buffer count referenced by MPOOLFILE. */
- MUTEX_LOCK(dbenv, &mfp->mutex);
- ++mfp->block_cnt;
- MUTEX_UNLOCK(dbenv, &mfp->mutex);
-
- /*
- * Initialize the mutex. This is the last initialization step,
- * because it's the only one that can fail, and everything else
- * must be set up or we can't jump to the err label because it
- * will call __memp_bhfree.
- */
- if ((ret = __db_mutex_setup(dbenv,
- &dbmp->reginfo[n_cache], &bhp->mutex, 0)) != 0)
- goto err;
- }
-
- DB_ASSERT(bhp->ref != 0);
-
- /*
- * If we're the only reference, update buffer and bucket priorities.
- * We may be about to release the hash bucket lock, and everything
- * should be correct, first. (We've already done this if we created
- * the buffer, so there is no need to do it again.)
- */
- if (state != SECOND_MISS && bhp->ref == 1) {
- bhp->priority = UINT32_T_MAX;
- SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
- SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
- hp->hash_priority =
- SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
- }
-
- /*
- * BH_TRASH --
- * The buffer we found may need to be filled from the disk.
- *
- * It's possible for the read function to fail, which means we fail as
- * well. Note, the __memp_pgread() function discards and reacquires
- * the hash lock, so the buffer must be pinned down so that it cannot
- * move and its contents are unchanged. Discard the buffer on failure
- * unless another thread is waiting on our I/O to complete. It's OK to
- * leave the buffer around, as the waiting thread will see the BH_TRASH
- * flag set, and will also attempt to discard it. If there's a waiter,
- * we need to decrement our reference count.
- */
- if (F_ISSET(bhp, BH_TRASH) &&
- (ret = __memp_pgread(dbmfp,
- &hp->hash_mutex, bhp, LF_ISSET(DB_MPOOL_CREATE) ? 1 : 0)) != 0)
- goto err;
-
- /*
- * BH_CALLPGIN --
- * The buffer was processed for being written to disk, and now has
- * to be re-converted for use.
- */
- if (F_ISSET(bhp, BH_CALLPGIN)) {
- if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
- goto err;
- F_CLR(bhp, BH_CALLPGIN);
- }
-
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
-
-#ifdef DIAGNOSTIC
- /* Update the file's pinned reference count. */
- R_LOCK(dbenv, dbmp->reginfo);
- ++dbmfp->pinref;
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- /*
- * We want to switch threads as often as possible, and at awkward
- * times. Yield every time we get a new page to ensure contention.
- */
- if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
- __os_yield(dbenv, 1);
-#endif
-
- *(void **)addrp = bhp->buf;
- return (0);
-
-err: /*
- * Discard our reference. If we're the only reference, discard the
- * the buffer entirely. If we held a reference to a buffer, we are
- * also still holding the hash bucket mutex.
- */
- if (b_incr) {
- if (bhp->ref == 1)
- (void)__memp_bhfree(dbmp, hp, bhp, 1);
- else {
- --bhp->ref;
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- }
- }
-
- /* If alloc_bhp is set, free the memory. */
- if (alloc_bhp != NULL)
- __db_shalloc_free(dbmp->reginfo[n_cache].addr, alloc_bhp);
-
- return (ret);
-}
-
-#ifdef HAVE_FILESYSTEM_NOTZERO
-/*
- * __memp_fs_notzero --
- * Initialize the underlying allocated pages in the file.
- */
-static int
-__memp_fs_notzero(dbenv, dbmfp, mfp, pgnoaddr)
- DB_ENV *dbenv;
- DB_MPOOLFILE *dbmfp;
- MPOOLFILE *mfp;
- db_pgno_t *pgnoaddr;
-{
- DB_IO db_io;
- u_int32_t i, npages;
- size_t nw;
- int ret;
- u_int8_t *page;
- char *fail;
-
- /*
- * Pages allocated by writing pages past end-of-file are not zeroed,
- * on some systems. Recovery could theoretically be fooled by a page
- * showing up that contained garbage. In order to avoid this, we
- * have to write the pages out to disk, and flush them. The reason
- * for the flush is because if we don't sync, the allocation of another
- * page subsequent to this one might reach the disk first, and if we
- * crashed at the right moment, leave us with this page as the one
- * allocated by writing a page past it in the file.
- *
- * Hash is the only access method that allocates groups of pages. We
- * know that it will use the existence of the last page in a group to
- * signify that the entire group is OK; so, write all the pages but
- * the last one in the group, flush them to disk, and then write the
- * last one to disk and flush it.
- */
- if ((ret = __os_calloc(dbenv, 1, mfp->stat.st_pagesize, &page)) != 0)
- return (ret);
-
- db_io.fhp = dbmfp->fhp;
- db_io.mutexp = dbmfp->mutexp;
- db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize;
- db_io.buf = page;
-
- npages = *pgnoaddr - mfp->last_pgno;
- for (i = 1; i < npages; ++i) {
- db_io.pgno = mfp->last_pgno + i;
- if ((ret = __os_io(dbenv, &db_io, DB_IO_WRITE, &nw)) != 0) {
- fail = "write";
- goto err;
- }
- }
- if (i != 1 && (ret = __os_fsync(dbenv, dbmfp->fhp)) != 0) {
- fail = "sync";
- goto err;
- }
-
- db_io.pgno = mfp->last_pgno + npages;
- if ((ret = __os_io(dbenv, &db_io, DB_IO_WRITE, &nw)) != 0) {
- fail = "write";
- goto err;
- }
- if ((ret = __os_fsync(dbenv, dbmfp->fhp)) != 0) {
- fail = "sync";
-err: __db_err(dbenv, "%s: %s failed for page %lu",
- __memp_fn(dbmfp), fail, (u_long)db_io.pgno);
- }
-
- __os_free(dbenv, page);
- return (ret);
-}
-#endif
diff --git a/bdb/mp/mp_fopen.c b/bdb/mp/mp_fopen.c
deleted file mode 100644
index 8fdefb0f5e9..00000000000
--- a/bdb/mp/mp_fopen.c
+++ /dev/null
@@ -1,1018 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_fopen.c,v 11.90 2002/08/26 15:22:01 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-static int __memp_fclose __P((DB_MPOOLFILE *, u_int32_t));
-static int __memp_fopen __P((DB_MPOOLFILE *,
- const char *, u_int32_t, int, size_t));
-static void __memp_get_fileid __P((DB_MPOOLFILE *, u_int8_t *));
-static void __memp_last_pgno __P((DB_MPOOLFILE *, db_pgno_t *));
-static void __memp_refcnt __P((DB_MPOOLFILE *, db_pgno_t *));
-static int __memp_set_clear_len __P((DB_MPOOLFILE *, u_int32_t));
-static int __memp_set_fileid __P((DB_MPOOLFILE *, u_int8_t *));
-static int __memp_set_ftype __P((DB_MPOOLFILE *, int));
-static int __memp_set_lsn_offset __P((DB_MPOOLFILE *, int32_t));
-static int __memp_set_pgcookie __P((DB_MPOOLFILE *, DBT *));
-static int __memp_set_priority __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY));
-static void __memp_set_unlink __P((DB_MPOOLFILE *, int));
-
-/* Initialization methods cannot be called after open is called. */
-#define MPF_ILLEGAL_AFTER_OPEN(dbmfp, name) \
- if (F_ISSET(dbmfp, MP_OPEN_CALLED)) \
- return (__db_mi_open((dbmfp)->dbmp->dbenv, name, 1));
-
-/*
- * __memp_fcreate --
- * Create a DB_MPOOLFILE handle.
- *
- * PUBLIC: int __memp_fcreate __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t));
- */
-int
-__memp_fcreate(dbenv, retp, flags)
- DB_ENV *dbenv;
- DB_MPOOLFILE **retp;
- u_int32_t flags;
-{
- DB_MPOOL *dbmp;
- DB_MPOOLFILE *dbmfp;
- int ret;
-
- PANIC_CHECK(dbenv);
- ENV_REQUIRES_CONFIG(dbenv,
- dbenv->mp_handle, "memp_fcreate", DB_INIT_MPOOL);
-
- dbmp = dbenv->mp_handle;
-
- /* Validate arguments. */
- if ((ret = __db_fchk(dbenv, "memp_fcreate", flags, 0)) != 0)
- return (ret);
-
- /* Allocate and initialize the per-process structure. */
- if ((ret = __os_calloc(dbenv, 1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)
- return (ret);
- if ((ret = __os_calloc(dbenv, 1, sizeof(DB_FH), &dbmfp->fhp)) != 0)
- goto err;
-
- /* Allocate and initialize a mutex if necessary. */
- if (F_ISSET(dbenv, DB_ENV_THREAD) &&
- (ret = __db_mutex_setup(dbenv, dbmp->reginfo, &dbmfp->mutexp,
- MUTEX_ALLOC | MUTEX_THREAD)) != 0)
- goto err;
-
- dbmfp->ref = 1;
- dbmfp->lsn_offset = -1;
- dbmfp->dbmp = dbmp;
- dbmfp->mfp = INVALID_ROFF;
-
- dbmfp->close = __memp_fclose;
- dbmfp->get = __memp_fget;
- dbmfp->get_fileid = __memp_get_fileid;
- dbmfp->last_pgno = __memp_last_pgno;
- dbmfp->open = __memp_fopen;
- dbmfp->put = __memp_fput;
- dbmfp->refcnt = __memp_refcnt;
- dbmfp->set = __memp_fset;
- dbmfp->set_clear_len = __memp_set_clear_len;
- dbmfp->set_fileid = __memp_set_fileid;
- dbmfp->set_ftype = __memp_set_ftype;
- dbmfp->set_lsn_offset = __memp_set_lsn_offset;
- dbmfp->set_pgcookie = __memp_set_pgcookie;
- dbmfp->set_priority = __memp_set_priority;
- dbmfp->set_unlink = __memp_set_unlink;
- dbmfp->sync = __memp_fsync;
-
- *retp = dbmfp;
- return (0);
-
-err: if (dbmfp != NULL) {
- if (dbmfp->fhp != NULL)
- (void)__os_free(dbenv, dbmfp->fhp);
- (void)__os_free(dbenv, dbmfp);
- }
- return (ret);
-}
-
-/*
- * __memp_set_clear_len --
- * Set the clear length.
- */
-static int
-__memp_set_clear_len(dbmfp, clear_len)
- DB_MPOOLFILE *dbmfp;
- u_int32_t clear_len;
-{
- MPF_ILLEGAL_AFTER_OPEN(dbmfp, "set_clear_len");
-
- dbmfp->clear_len = clear_len;
- return (0);
-}
-
-/*
- * __memp_set_fileid --
- * Set the file ID.
- */
-static int
-__memp_set_fileid(dbmfp, fileid)
- DB_MPOOLFILE *dbmfp;
- u_int8_t *fileid;
-{
- MPF_ILLEGAL_AFTER_OPEN(dbmfp, "set_fileid");
-
- /*
- * XXX
- * This is dangerous -- we're saving the caller's pointer instead
- * of allocating memory and copying the contents.
- */
- dbmfp->fileid = fileid;
- return (0);
-}
-
-/*
- * __memp_set_ftype --
- * Set the file type (as registered).
- */
-static int
-__memp_set_ftype(dbmfp, ftype)
- DB_MPOOLFILE *dbmfp;
- int ftype;
-{
- MPF_ILLEGAL_AFTER_OPEN(dbmfp, "set_ftype");
-
- dbmfp->ftype = ftype;
- return (0);
-}
-
-/*
- * __memp_set_lsn_offset --
- * Set the page's LSN offset.
- */
-static int
-__memp_set_lsn_offset(dbmfp, lsn_offset)
- DB_MPOOLFILE *dbmfp;
- int32_t lsn_offset;
-{
- MPF_ILLEGAL_AFTER_OPEN(dbmfp, "set_lsn_offset");
-
- dbmfp->lsn_offset = lsn_offset;
- return (0);
-}
-
-/*
- * __memp_set_pgcookie --
- * Set the pgin/pgout cookie.
- */
-static int
-__memp_set_pgcookie(dbmfp, pgcookie)
- DB_MPOOLFILE *dbmfp;
- DBT *pgcookie;
-{
- MPF_ILLEGAL_AFTER_OPEN(dbmfp, "set_pgcookie");
-
- dbmfp->pgcookie = pgcookie;
- return (0);
-}
-
-/*
- * __memp_set_priority --
- * Set the cache priority for pages from this file.
- */
-static int
-__memp_set_priority(dbmfp, priority)
- DB_MPOOLFILE *dbmfp;
- DB_CACHE_PRIORITY priority;
-{
- switch (priority) {
- case DB_PRIORITY_VERY_LOW:
- dbmfp->mfp->priority = MPOOL_PRI_VERY_LOW;
- break;
- case DB_PRIORITY_LOW:
- dbmfp->mfp->priority = MPOOL_PRI_LOW;
- break;
- case DB_PRIORITY_DEFAULT:
- dbmfp->mfp->priority = MPOOL_PRI_DEFAULT;
- break;
- case DB_PRIORITY_HIGH:
- dbmfp->mfp->priority = MPOOL_PRI_HIGH;
- break;
- case DB_PRIORITY_VERY_HIGH:
- dbmfp->mfp->priority = MPOOL_PRI_VERY_HIGH;
- break;
- default:
- __db_err(dbmfp->dbmp->dbenv,
- "Unknown priority value: %d", priority);
- return (EINVAL);
- }
-
- return (0);
-}
-
-/*
- * __memp_fopen --
- * Open a backing file for the memory pool.
- */
-static int
-__memp_fopen(dbmfp, path, flags, mode, pagesize)
- DB_MPOOLFILE *dbmfp;
- const char *path;
- u_int32_t flags;
- int mode;
- size_t pagesize;
-{
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- int ret;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
-
- PANIC_CHECK(dbenv);
-
- /* Validate arguments. */
- if ((ret = __db_fchk(dbenv, "memp_fopen", flags,
- DB_CREATE | DB_DIRECT | DB_EXTENT |
- DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE)) != 0)
- return (ret);
-
- /*
- * Require a non-zero, power-of-two pagesize, smaller than the
- * clear length.
- */
- if (pagesize == 0 || !POWER_OF_TWO(pagesize)) {
- __db_err(dbenv,
- "memp_fopen: page sizes must be a power-of-2");
- return (EINVAL);
- }
- if (dbmfp->clear_len > pagesize) {
- __db_err(dbenv,
- "memp_fopen: clear length larger than page size");
- return (EINVAL);
- }
-
- /* Read-only checks, and local flag. */
- if (LF_ISSET(DB_RDONLY) && path == NULL) {
- __db_err(dbenv,
- "memp_fopen: temporary files can't be readonly");
- return (EINVAL);
- }
-
- return (__memp_fopen_int(dbmfp, NULL, path, flags, mode, pagesize));
-}
-
-/*
- * __memp_fopen_int --
- * Open a backing file for the memory pool; internal version.
- *
- * PUBLIC: int __memp_fopen_int __P((DB_MPOOLFILE *,
- * PUBLIC: MPOOLFILE *, const char *, u_int32_t, int, size_t));
- */
-int
-__memp_fopen_int(dbmfp, mfp, path, flags, mode, pagesize)
- DB_MPOOLFILE *dbmfp;
- MPOOLFILE *mfp;
- const char *path;
- u_int32_t flags;
- int mode;
- size_t pagesize;
-{
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- MPOOL *mp;
- db_pgno_t last_pgno;
- size_t maxmap;
- u_int32_t mbytes, bytes, oflags;
- int mfp_alloc, ret;
- u_int8_t idbuf[DB_FILE_ID_LEN];
- char *rpath;
- void *p;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
- mp = dbmp->reginfo[0].primary;
- mfp_alloc = ret = 0;
- rpath = NULL;
-
- /*
- * Set the page size so os_open can decide whether to turn buffering
- * off if the DB_DIRECT_DB flag is set.
- */
- dbmfp->fhp->pagesize = (u_int32_t)pagesize;
-
- /*
- * If it's a temporary file, delay the open until we actually need
- * to write the file, and we know we can't join any existing files.
- */
- if (path == NULL)
- goto alloc;
-
- /*
- * Get the real name for this file and open it. If it's a Queue extent
- * file, it may not exist, and that's OK.
- */
- oflags = 0;
- if (LF_ISSET(DB_CREATE))
- oflags |= DB_OSO_CREATE;
- if (LF_ISSET(DB_DIRECT))
- oflags |= DB_OSO_DIRECT;
- if (LF_ISSET(DB_RDONLY)) {
- F_SET(dbmfp, MP_READONLY);
- oflags |= DB_OSO_RDONLY;
- }
- if ((ret =
- __db_appname(dbenv, DB_APP_DATA, path, 0, NULL, &rpath)) != 0)
- goto err;
- if ((ret = __os_open(dbenv, rpath, oflags, mode, dbmfp->fhp)) != 0) {
- if (!LF_ISSET(DB_EXTENT))
- __db_err(dbenv, "%s: %s", rpath, db_strerror(ret));
- goto err;
- }
-
- /*
- * Figure out the file's size.
- *
- * !!!
- * We can't use off_t's here, or in any code in the mainline library
- * for that matter. (We have to use them in the os stubs, of course,
- * as there are system calls that take them as arguments.) The reason
- * is some customers build in environments where an off_t is 32-bits,
- * but still run where offsets are 64-bits, and they pay us a lot of
- * money.
- */
- if ((ret = __os_ioinfo(
- dbenv, rpath, dbmfp->fhp, &mbytes, &bytes, NULL)) != 0) {
- __db_err(dbenv, "%s: %s", rpath, db_strerror(ret));
- goto err;
- }
-
- /*
- * Get the file id if we weren't given one. Generated file id's
- * don't use timestamps, otherwise there'd be no chance of any
- * other process joining the party.
- */
- if (dbmfp->fileid == NULL) {
- if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
- goto err;
- dbmfp->fileid = idbuf;
- }
-
- /*
- * If our caller knows what mfp we're using, increment the ref count,
- * no need to search.
- *
- * We don't need to acquire a lock other than the mfp itself, because
- * we know there's another reference and it's not going away.
- */
- if (mfp != NULL) {
- MUTEX_LOCK(dbenv, &mfp->mutex);
- ++mfp->mpf_cnt;
- MUTEX_UNLOCK(dbenv, &mfp->mutex);
- goto check_map;
- }
-
- /*
- * If not creating a temporary file, walk the list of MPOOLFILE's,
- * looking for a matching file. Files backed by temporary files
- * or previously removed files can't match.
- *
- * DB_TRUNCATE support.
- *
- * The fileID is a filesystem unique number (e.g., a UNIX dev/inode
- * pair) plus a timestamp. If files are removed and created in less
- * than a second, the fileID can be repeated. The problem with
- * repetition happens when the file that previously had the fileID
- * value still has pages in the pool, since we don't want to use them
- * to satisfy requests for the new file.
- *
- * Because the DB_TRUNCATE flag reuses the dev/inode pair, repeated
- * opens with that flag set guarantees matching fileIDs when the
- * machine can open a file and then re-open with truncate within a
- * second. For this reason, we pass that flag down, and, if we find
- * a matching entry, we ensure that it's never found again, and we
- * create a new entry for the current request.
- */
- R_LOCK(dbenv, dbmp->reginfo);
- for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- /* Skip dead files and temporary files. */
- if (F_ISSET(mfp, MP_DEADFILE | MP_TEMP))
- continue;
-
- /* Skip non-matching files. */
- if (memcmp(dbmfp->fileid, R_ADDR(dbmp->reginfo,
- mfp->fileid_off), DB_FILE_ID_LEN) != 0)
- continue;
-
- /*
- * If the file is being truncated, remove it from the system
- * and create a new entry.
- *
- * !!!
- * We should be able to set mfp to NULL and break out of the
- * loop, but I like the idea of checking all the entries.
- */
- if (LF_ISSET(DB_TRUNCATE)) {
- MUTEX_LOCK(dbenv, &mfp->mutex);
- MPOOLFILE_IGNORE(mfp);
- MUTEX_UNLOCK(dbenv, &mfp->mutex);
- continue;
- }
-
- /*
- * Some things about a file cannot be changed: the clear length,
- * page size, or lSN location.
- *
- * The file type can change if the application's pre- and post-
- * processing needs change. For example, an application that
- * created a hash subdatabase in a database that was previously
- * all btree.
- *
- * XXX
- * We do not check to see if the pgcookie information changed,
- * or update it if it is, this might be a bug.
- */
- if (dbmfp->clear_len != mfp->clear_len ||
- pagesize != mfp->stat.st_pagesize ||
- dbmfp->lsn_offset != mfp->lsn_off) {
- __db_err(dbenv,
- "%s: clear length, page size or LSN location changed",
- path);
- R_UNLOCK(dbenv, dbmp->reginfo);
- ret = EINVAL;
- goto err;
- }
-
- if (dbmfp->ftype != 0)
- mfp->ftype = dbmfp->ftype;
-
- MUTEX_LOCK(dbenv, &mfp->mutex);
- ++mfp->mpf_cnt;
- MUTEX_UNLOCK(dbenv, &mfp->mutex);
- break;
- }
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- if (mfp != NULL)
- goto check_map;
-
-alloc: /* Allocate and initialize a new MPOOLFILE. */
- if ((ret = __memp_alloc(
- dbmp, dbmp->reginfo, NULL, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
- goto err;
- mfp_alloc = 1;
- memset(mfp, 0, sizeof(MPOOLFILE));
- mfp->mpf_cnt = 1;
- mfp->ftype = dbmfp->ftype;
- mfp->stat.st_pagesize = pagesize;
- mfp->lsn_off = dbmfp->lsn_offset;
- mfp->clear_len = dbmfp->clear_len;
-
- if (LF_ISSET(DB_DIRECT))
- F_SET(mfp, MP_DIRECT);
- if (LF_ISSET(DB_EXTENT))
- F_SET(mfp, MP_EXTENT);
- F_SET(mfp, MP_CAN_MMAP);
-
- if (path == NULL)
- F_SET(mfp, MP_TEMP);
- else {
- /*
- * Don't permit files that aren't a multiple of the pagesize,
- * and find the number of the last page in the file, all the
- * time being careful not to overflow 32 bits.
- *
- * During verify or recovery, we might have to cope with a
- * truncated file; if the file size is not a multiple of the
- * page size, round down to a page, we'll take care of the
- * partial page outside the mpool system.
- */
- if (bytes % pagesize != 0) {
- if (LF_ISSET(DB_ODDFILESIZE))
- bytes -= (u_int32_t)(bytes % pagesize);
- else {
- __db_err(dbenv,
- "%s: file size not a multiple of the pagesize", rpath);
- ret = EINVAL;
- goto err;
- }
- }
-
- /*
- * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a
- * page get, we have to increment the last page in the file.
- * Figure it out and save it away.
- *
- * Note correction: page numbers are zero-based, not 1-based.
- */
- last_pgno = (db_pgno_t)(mbytes * (MEGABYTE / pagesize));
- last_pgno += (db_pgno_t)(bytes / pagesize);
- if (last_pgno != 0)
- --last_pgno;
- mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
-
- /* Copy the file path into shared memory. */
- if ((ret = __memp_alloc(dbmp, dbmp->reginfo,
- NULL, strlen(path) + 1, &mfp->path_off, &p)) != 0)
- goto err;
- memcpy(p, path, strlen(path) + 1);
-
- /* Copy the file identification string into shared memory. */
- if ((ret = __memp_alloc(dbmp, dbmp->reginfo,
- NULL, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
- goto err;
- memcpy(p, dbmfp->fileid, DB_FILE_ID_LEN);
- }
-
- /* Copy the page cookie into shared memory. */
- if (dbmfp->pgcookie == NULL || dbmfp->pgcookie->size == 0) {
- mfp->pgcookie_len = 0;
- mfp->pgcookie_off = 0;
- } else {
- if ((ret = __memp_alloc(dbmp, dbmp->reginfo,
- NULL, dbmfp->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
- goto err;
- memcpy(p, dbmfp->pgcookie->data, dbmfp->pgcookie->size);
- mfp->pgcookie_len = dbmfp->pgcookie->size;
- }
-
- /*
- * Prepend the MPOOLFILE to the list of MPOOLFILE's.
- */
- R_LOCK(dbenv, dbmp->reginfo);
- ret = __db_mutex_setup(dbenv, dbmp->reginfo, &mfp->mutex,
- MUTEX_NO_RLOCK);
- if (ret == 0)
- SH_TAILQ_INSERT_HEAD(&mp->mpfq, mfp, q, __mpoolfile);
- R_UNLOCK(dbenv, dbmp->reginfo);
- if (ret != 0)
- goto err;
-
-check_map:
- /*
- * If a file:
- * + isn't temporary
- * + is read-only
- * + doesn't require any pgin/pgout support
- * + the DB_NOMMAP flag wasn't set (in either the file open or
- * the environment in which it was opened)
- * + and is less than mp_mmapsize bytes in size
- *
- * we can mmap it instead of reading/writing buffers. Don't do error
- * checking based on the mmap call failure. We want to do normal I/O
- * on the file if the reason we failed was because the file was on an
- * NFS mounted partition, and we can fail in buffer I/O just as easily
- * as here.
- *
- * We'd like to test to see if the file is too big to mmap. Since we
- * don't know what size or type off_t's or size_t's are, or the largest
- * unsigned integral type is, or what random insanity the local C
- * compiler will perpetrate, doing the comparison in a portable way is
- * flatly impossible. Hope that mmap fails if the file is too large.
- */
-#define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 MB. */
- if (F_ISSET(mfp, MP_CAN_MMAP)) {
- if (path == NULL)
- F_CLR(mfp, MP_CAN_MMAP);
- if (!F_ISSET(dbmfp, MP_READONLY))
- F_CLR(mfp, MP_CAN_MMAP);
- if (dbmfp->ftype != 0)
- F_CLR(mfp, MP_CAN_MMAP);
- if (LF_ISSET(DB_NOMMAP) || F_ISSET(dbenv, DB_ENV_NOMMAP))
- F_CLR(mfp, MP_CAN_MMAP);
- maxmap = dbenv->mp_mmapsize == 0 ?
- DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
- if (mbytes > maxmap / MEGABYTE ||
- (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE))
- F_CLR(mfp, MP_CAN_MMAP);
-
- dbmfp->addr = NULL;
- if (F_ISSET(mfp, MP_CAN_MMAP)) {
- dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
- if (__os_mapfile(dbenv, rpath,
- dbmfp->fhp, dbmfp->len, 1, &dbmfp->addr) != 0) {
- dbmfp->addr = NULL;
- F_CLR(mfp, MP_CAN_MMAP);
- }
- }
- }
-
- dbmfp->mfp = mfp;
-
- F_SET(dbmfp, MP_OPEN_CALLED);
-
- /* Add the file to the process' list of DB_MPOOLFILEs. */
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
-
- if (0) {
-err: if (F_ISSET(dbmfp->fhp, DB_FH_VALID))
- (void)__os_closehandle(dbenv, dbmfp->fhp);
-
- if (mfp_alloc) {
- R_LOCK(dbenv, dbmp->reginfo);
- if (mfp->path_off != 0)
- __db_shalloc_free(dbmp->reginfo[0].addr,
- R_ADDR(dbmp->reginfo, mfp->path_off));
- if (mfp->fileid_off != 0)
- __db_shalloc_free(dbmp->reginfo[0].addr,
- R_ADDR(dbmp->reginfo, mfp->fileid_off));
- __db_shalloc_free(dbmp->reginfo[0].addr, mfp);
- R_UNLOCK(dbenv, dbmp->reginfo);
- }
-
- }
- if (rpath != NULL)
- __os_free(dbenv, rpath);
- return (ret);
-}
-
-/*
- * __memp_get_fileid --
- * Return the file ID.
- *
- * XXX
- * Undocumented interface: DB private.
- */
-static void
-__memp_get_fileid(dbmfp, fidp)
- DB_MPOOLFILE *dbmfp;
- u_int8_t *fidp;
-{
- /*
- * No lock needed -- we're using the handle, it had better not
- * be going away.
- *
- * !!!
- * Get the fileID out of the region, not out of the DB_MPOOLFILE
- * structure because the DB_MPOOLFILE reference is possibly short
- * lived, and isn't to be trusted.
- */
- memcpy(fidp, R_ADDR(
- dbmfp->dbmp->reginfo, dbmfp->mfp->fileid_off), DB_FILE_ID_LEN);
-}
-
-/*
- * __memp_last_pgno --
- * Return the page number of the last page in the file.
- *
- * XXX
- * Undocumented interface: DB private.
- */
-static void
-__memp_last_pgno(dbmfp, pgnoaddr)
- DB_MPOOLFILE *dbmfp;
- db_pgno_t *pgnoaddr;
-{
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
-
- R_LOCK(dbenv, dbmp->reginfo);
- *pgnoaddr = dbmfp->mfp->last_pgno;
- R_UNLOCK(dbenv, dbmp->reginfo);
-}
-
-/*
- * __memp_refcnt --
- * Return the current reference count.
- *
- * XXX
- * Undocumented interface: DB private.
- */
-static void
-__memp_refcnt(dbmfp, cntp)
- DB_MPOOLFILE *dbmfp;
- db_pgno_t *cntp;
-{
- DB_ENV *dbenv;
-
- dbenv = dbmfp->dbmp->dbenv;
-
- MUTEX_LOCK(dbenv, &dbmfp->mfp->mutex);
- *cntp = dbmfp->mfp->mpf_cnt;
- MUTEX_UNLOCK(dbenv, &dbmfp->mfp->mutex);
-}
-
-/*
- * __memp_set_unlink --
- * Set unlink on last close flag.
- *
- * XXX
- * Undocumented interface: DB private.
- */
-static void
-__memp_set_unlink(dbmpf, set)
- DB_MPOOLFILE *dbmpf;
- int set;
-{
- DB_ENV *dbenv;
-
- dbenv = dbmpf->dbmp->dbenv;
-
- MUTEX_LOCK(dbenv, &dbmpf->mfp->mutex);
- if (set)
- F_SET(dbmpf->mfp, MP_UNLINK);
- else
- F_CLR(dbmpf->mfp, MP_UNLINK);
- MUTEX_UNLOCK(dbenv, &dbmpf->mfp->mutex);
-}
-
-/*
- * memp_fclose --
- * Close a backing file for the memory pool.
- */
-static int
-__memp_fclose(dbmfp, flags)
- DB_MPOOLFILE *dbmfp;
- u_int32_t flags;
-{
- DB_ENV *dbenv;
- int ret, t_ret;
-
- dbenv = dbmfp->dbmp->dbenv;
-
- PANIC_CHECK(dbenv);
-
- /*
- * XXX
- * DB_MPOOL_DISCARD: Undocumented flag: DB private.
- */
- ret = __db_fchk(dbenv, "DB_MPOOLFILE->close", flags, DB_MPOOL_DISCARD);
-
- if ((t_ret = __memp_fclose_int(dbmfp, flags)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __memp_fclose_int --
- * Internal version of __memp_fclose.
- *
- * PUBLIC: int __memp_fclose_int __P((DB_MPOOLFILE *, u_int32_t));
- */
-int
-__memp_fclose_int(dbmfp, flags)
- DB_MPOOLFILE *dbmfp;
- u_int32_t flags;
-{
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
- char *rpath;
- int deleted, ret, t_ret;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
- ret = 0;
-
- /*
- * We have to reference count DB_MPOOLFILE structures as other threads
- * in the process may be using them. Here's the problem:
- *
- * Thread A opens a database.
- * Thread B uses thread A's DB_MPOOLFILE to write a buffer
- * in order to free up memory in the mpool cache.
- * Thread A closes the database while thread B is using the
- * DB_MPOOLFILE structure.
- *
- * By opening all databases before creating any threads, and closing
- * the databases after all the threads have exited, applications get
- * better performance and avoid the problem path entirely.
- *
- * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer is a
- * short-term lock, even in worst case, since we better be the only
- * thread of control using the DB_MPOOLFILE structure to read pages
- * *into* the cache. Wait until we're the only reference holder and
- * remove the DB_MPOOLFILE structure from the list, so nobody else can
- * find it. We do this, rather than have the last reference holder
- * (whoever that might be) discard the DB_MPOOLFILE structure, because
- * we'd rather write error messages to the application in the close
- * routine, not in the checkpoint/sync routine.
- *
- * !!!
- * It's possible the DB_MPOOLFILE was never added to the DB_MPOOLFILE
- * file list, check the DB_OPEN_CALLED flag to be sure.
- */
- for (deleted = 0;;) {
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- if (dbmfp->ref == 1) {
- if (F_ISSET(dbmfp, MP_OPEN_CALLED))
- TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
- deleted = 1;
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
-
- if (deleted)
- break;
- __os_sleep(dbenv, 1, 0);
- }
-
- /* Complain if pinned blocks never returned. */
- if (dbmfp->pinref != 0) {
- __db_err(dbenv, "%s: close: %lu blocks left pinned",
- __memp_fn(dbmfp), (u_long)dbmfp->pinref);
- ret = __db_panic(dbenv, DB_RUNRECOVERY);
- }
-
- /* Discard any mmap information. */
- if (dbmfp->addr != NULL &&
- (ret = __os_unmapfile(dbenv, dbmfp->addr, dbmfp->len)) != 0)
- __db_err(dbenv, "%s: %s", __memp_fn(dbmfp), db_strerror(ret));
-
- /* Close the file; temporary files may not yet have been created. */
- if (F_ISSET(dbmfp->fhp, DB_FH_VALID) &&
- (t_ret = __os_closehandle(dbenv, dbmfp->fhp)) != 0) {
- __db_err(dbenv, "%s: %s", __memp_fn(dbmfp), db_strerror(t_ret));
- if (ret == 0)
- ret = t_ret;
- }
-
- /* Discard the thread mutex. */
- if (dbmfp->mutexp != NULL)
- __db_mutex_free(dbenv, dbmp->reginfo, dbmfp->mutexp);
-
- /*
- * Discard our reference on the the underlying MPOOLFILE, and close
- * it if it's no longer useful to anyone. It possible the open of
- * the file never happened or wasn't successful, in which case, mpf
- * will be NULL;
- */
- if ((mfp = dbmfp->mfp) == NULL)
- goto done;
-
- /*
- * If it's a temp file, all outstanding references belong to unflushed
- * buffers. (A temp file can only be referenced by one DB_MPOOLFILE).
- * We don't care about preserving any of those buffers, so mark the
- * MPOOLFILE as dead so that even the dirty ones just get discarded
- * when we try to flush them.
- */
- deleted = 0;
- MUTEX_LOCK(dbenv, &mfp->mutex);
- if (--mfp->mpf_cnt == 0 || LF_ISSET(DB_MPOOL_DISCARD)) {
- if (LF_ISSET(DB_MPOOL_DISCARD) ||
- F_ISSET(mfp, MP_TEMP | MP_UNLINK))
- MPOOLFILE_IGNORE(mfp);
- if (F_ISSET(mfp, MP_UNLINK)) {
- if ((t_ret = __db_appname(dbmp->dbenv,
- DB_APP_DATA, R_ADDR(dbmp->reginfo,
- mfp->path_off), 0, NULL, &rpath)) != 0 && ret == 0)
- ret = t_ret;
- if (t_ret == 0) {
- if ((t_ret = __os_unlink(
- dbmp->dbenv, rpath) != 0) && ret == 0)
- ret = t_ret;
- __os_free(dbenv, rpath);
- }
- }
- if (mfp->block_cnt == 0) {
- if ((t_ret =
- __memp_mf_discard(dbmp, mfp)) != 0 && ret == 0)
- ret = t_ret;
- deleted = 1;
- }
- }
- if (deleted == 0)
- MUTEX_UNLOCK(dbenv, &mfp->mutex);
-
- /* Discard the DB_MPOOLFILE structure. */
-done: __os_free(dbenv, dbmfp->fhp);
- __os_free(dbenv, dbmfp);
-
- return (ret);
-}
-
-/*
- * __memp_mf_discard --
- * Discard an MPOOLFILE.
- *
- * PUBLIC: int __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *));
- */
-int
-__memp_mf_discard(dbmp, mfp)
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
-{
- DB_ENV *dbenv;
- DB_FH fh;
- DB_MPOOL_STAT *sp;
- MPOOL *mp;
- char *rpath;
- int ret;
-
- dbenv = dbmp->dbenv;
- mp = dbmp->reginfo[0].primary;
- ret = 0;
-
- /*
- * Expects caller to be holding the MPOOLFILE mutex.
- *
- * When discarding a file, we have to flush writes from it to disk.
- * The scenario is that dirty buffers from this file need to be
- * flushed to satisfy a future checkpoint, but when the checkpoint
- * calls mpool sync, the sync code won't know anything about them.
- */
- if (!F_ISSET(mfp, MP_DEADFILE) &&
- (ret = __db_appname(dbenv, DB_APP_DATA,
- R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) == 0) {
- if ((ret = __os_open(dbenv, rpath, 0, 0, &fh)) == 0) {
- ret = __os_fsync(dbenv, &fh);
- (void)__os_closehandle(dbenv, &fh);
- }
- __os_free(dbenv, rpath);
- }
-
- /*
- * We have to release the MPOOLFILE lock before acquiring the region
- * lock so that we don't deadlock. Make sure nobody ever looks at
- * this structure again.
- */
- MPOOLFILE_IGNORE(mfp);
-
- /* Discard the mutex we're holding. */
- MUTEX_UNLOCK(dbenv, &mfp->mutex);
-
- /* Delete from the list of MPOOLFILEs. */
- R_LOCK(dbenv, dbmp->reginfo);
- SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
-
- /* Copy the statistics into the region. */
- sp = &mp->stat;
- sp->st_cache_hit += mfp->stat.st_cache_hit;
- sp->st_cache_miss += mfp->stat.st_cache_miss;
- sp->st_map += mfp->stat.st_map;
- sp->st_page_create += mfp->stat.st_page_create;
- sp->st_page_in += mfp->stat.st_page_in;
- sp->st_page_out += mfp->stat.st_page_out;
-
- /* Clear the mutex this MPOOLFILE recorded. */
- __db_shlocks_clear(&mfp->mutex, dbmp->reginfo,
- (REGMAINT *)R_ADDR(dbmp->reginfo, mp->maint_off));
-
- /* Free the space. */
- if (mfp->path_off != 0)
- __db_shalloc_free(dbmp->reginfo[0].addr,
- R_ADDR(dbmp->reginfo, mfp->path_off));
- if (mfp->fileid_off != 0)
- __db_shalloc_free(dbmp->reginfo[0].addr,
- R_ADDR(dbmp->reginfo, mfp->fileid_off));
- if (mfp->pgcookie_off != 0)
- __db_shalloc_free(dbmp->reginfo[0].addr,
- R_ADDR(dbmp->reginfo, mfp->pgcookie_off));
- __db_shalloc_free(dbmp->reginfo[0].addr, mfp);
-
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- return (ret);
-}
-
-/*
- * __memp_fn --
- * On errors we print whatever is available as the file name.
- *
- * PUBLIC: char * __memp_fn __P((DB_MPOOLFILE *));
- */
-char *
-__memp_fn(dbmfp)
- DB_MPOOLFILE *dbmfp;
-{
- return (__memp_fns(dbmfp->dbmp, dbmfp->mfp));
-}
-
-/*
- * __memp_fns --
- * On errors we print whatever is available as the file name.
- *
- * PUBLIC: char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
- *
- */
-char *
-__memp_fns(dbmp, mfp)
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
-{
- if (mfp->path_off == 0)
- return ((char *)"temporary");
-
- return ((char *)R_ADDR(dbmp->reginfo, mfp->path_off));
-}
diff --git a/bdb/mp/mp_fput.c b/bdb/mp/mp_fput.c
deleted file mode 100644
index 271e44a4ef8..00000000000
--- a/bdb/mp/mp_fput.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_fput.c,v 11.36 2002/08/09 19:04:11 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-/*
- * __memp_fput --
- * Mpool file put function.
- *
- * PUBLIC: int __memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t));
- */
-int
-__memp_fput(dbmfp, pgaddr, flags)
- DB_MPOOLFILE *dbmfp;
- void *pgaddr;
- u_int32_t flags;
-{
- BH *argbhp, *bhp, *prev;
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- DB_MPOOL_HASH *hp;
- MPOOL *c_mp;
- u_int32_t n_cache;
- int adjust, ret;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
-
- PANIC_CHECK(dbenv);
-
- /* Validate arguments. */
- if (flags) {
- if ((ret = __db_fchk(dbenv, "memp_fput", flags,
- DB_MPOOL_CLEAN | DB_MPOOL_DIRTY | DB_MPOOL_DISCARD)) != 0)
- return (ret);
- if ((ret = __db_fcchk(dbenv, "memp_fput",
- flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
- return (ret);
-
- if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
- __db_err(dbenv,
- "%s: dirty flag set for readonly file page",
- __memp_fn(dbmfp));
- return (EACCES);
- }
- }
-
- /*
- * If we're mapping the file, there's nothing to do. Because we can
- * stop mapping the file at any time, we have to check on each buffer
- * to see if the address we gave the application was part of the map
- * region.
- */
- if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
- (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
- return (0);
-
-#ifdef DIAGNOSTIC
- /*
- * Decrement the per-file pinned buffer count (mapped pages aren't
- * counted).
- */
- R_LOCK(dbenv, dbmp->reginfo);
- if (dbmfp->pinref == 0) {
- ret = EINVAL;
- __db_err(dbenv,
- "%s: more pages returned than retrieved", __memp_fn(dbmfp));
- } else {
- ret = 0;
- --dbmfp->pinref;
- }
- R_UNLOCK(dbenv, dbmp->reginfo);
- if (ret != 0)
- return (ret);
-#endif
-
- /* Convert a page address to a buffer header and hash bucket. */
- bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
- n_cache = NCACHE(dbmp->reginfo[0].primary, bhp->mf_offset, bhp->pgno);
- c_mp = dbmp->reginfo[n_cache].primary;
- hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
- hp = &hp[NBUCKET(c_mp, bhp->mf_offset, bhp->pgno)];
-
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
-
- /* Set/clear the page bits. */
- if (LF_ISSET(DB_MPOOL_CLEAN) &&
- F_ISSET(bhp, BH_DIRTY) && !F_ISSET(bhp, BH_DIRTY_CREATE)) {
- DB_ASSERT(hp->hash_page_dirty != 0);
- --hp->hash_page_dirty;
- F_CLR(bhp, BH_DIRTY);
- }
- if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
- ++hp->hash_page_dirty;
- F_SET(bhp, BH_DIRTY);
- }
- if (LF_ISSET(DB_MPOOL_DISCARD))
- F_SET(bhp, BH_DISCARD);
-
- /*
- * Check for a reference count going to zero. This can happen if the
- * application returns a page twice.
- */
- if (bhp->ref == 0) {
- __db_err(dbenv, "%s: page %lu: unpinned page returned",
- __memp_fn(dbmfp), (u_long)bhp->pgno);
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- return (EINVAL);
- }
-
- /*
- * If more than one reference to the page or a reference other than a
- * thread waiting to flush the buffer to disk, we're done. Ignore the
- * discard flags (for now) and leave the buffer's priority alone.
- */
- if (--bhp->ref > 1 || (bhp->ref == 1 && !F_ISSET(bhp, BH_LOCKED))) {
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- return (0);
- }
-
- /* Update priority values. */
- if (F_ISSET(bhp, BH_DISCARD) ||
- dbmfp->mfp->priority == MPOOL_PRI_VERY_LOW)
- bhp->priority = 0;
- else {
- /*
- * We don't lock the LRU counter or the stat.st_pages field, if
- * we get garbage (which won't happen on a 32-bit machine), it
- * only means a buffer has the wrong priority.
- */
- bhp->priority = c_mp->lru_count;
-
- adjust = 0;
- if (dbmfp->mfp->priority != 0)
- adjust =
- (int)c_mp->stat.st_pages / dbmfp->mfp->priority;
- if (F_ISSET(bhp, BH_DIRTY))
- adjust += c_mp->stat.st_pages / MPOOL_PRI_DIRTY;
-
- if (adjust > 0) {
- if (UINT32_T_MAX - bhp->priority <= (u_int32_t)adjust)
- bhp->priority += adjust;
- } else if (adjust < 0)
- if (bhp->priority > (u_int32_t)-adjust)
- bhp->priority += adjust;
- }
-
- /*
- * Buffers on hash buckets are sorted by priority -- move the buffer
- * to the correct position in the list.
- */
- argbhp = bhp;
- SH_TAILQ_REMOVE(&hp->hash_bucket, argbhp, hq, __bh);
-
- prev = NULL;
- for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
- bhp != NULL; prev = bhp, bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
- if (bhp->priority > argbhp->priority)
- break;
- if (prev == NULL)
- SH_TAILQ_INSERT_HEAD(&hp->hash_bucket, argbhp, hq, __bh);
- else
- SH_TAILQ_INSERT_AFTER(&hp->hash_bucket, prev, argbhp, hq, __bh);
-
- /* Reset the hash bucket's priority. */
- hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
-
-#ifdef DIAGNOSTIC
- __memp_check_order(hp);
-#endif
-
- /*
- * The sync code has a separate counter for buffers on which it waits.
- * It reads that value without holding a lock so we update it as the
- * last thing we do. Once that value goes to 0, we won't see another
- * reference to that buffer being returned to the cache until the sync
- * code has finished, so we're safe as long as we don't let the value
- * go to 0 before we finish with the buffer.
- */
- if (F_ISSET(argbhp, BH_LOCKED) && argbhp->ref_sync != 0)
- --argbhp->ref_sync;
-
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
-
- return (0);
-}
diff --git a/bdb/mp/mp_fset.c b/bdb/mp/mp_fset.c
deleted file mode 100644
index 65cd6286ac9..00000000000
--- a/bdb/mp/mp_fset.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_fset.c,v 11.25 2002/05/03 15:21:17 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-/*
- * __memp_fset --
- * Mpool page set-flag routine.
- *
- * PUBLIC: int __memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t));
- */
-int
-__memp_fset(dbmfp, pgaddr, flags)
- DB_MPOOLFILE *dbmfp;
- void *pgaddr;
- u_int32_t flags;
-{
- BH *bhp;
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- DB_MPOOL_HASH *hp;
- MPOOL *c_mp;
- u_int32_t n_cache;
- int ret;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
-
- PANIC_CHECK(dbenv);
-
- /* Validate arguments. */
- if (flags == 0)
- return (__db_ferr(dbenv, "memp_fset", 1));
-
- if ((ret = __db_fchk(dbenv, "memp_fset", flags,
- DB_MPOOL_CLEAN | DB_MPOOL_DIRTY | DB_MPOOL_DISCARD)) != 0)
- return (ret);
- if ((ret = __db_fcchk(dbenv, "memp_fset",
- flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
- return (ret);
-
- if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
- __db_err(dbenv, "%s: dirty flag set for readonly file page",
- __memp_fn(dbmfp));
- return (EACCES);
- }
-
- /* Convert the page address to a buffer header and hash bucket. */
- bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
- n_cache = NCACHE(dbmp->reginfo[0].primary, bhp->mf_offset, bhp->pgno);
- c_mp = dbmp->reginfo[n_cache].primary;
- hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
- hp = &hp[NBUCKET(c_mp, bhp->mf_offset, bhp->pgno)];
-
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
-
- /* Set/clear the page bits. */
- if (LF_ISSET(DB_MPOOL_CLEAN) &&
- F_ISSET(bhp, BH_DIRTY) && !F_ISSET(bhp, BH_DIRTY_CREATE)) {
- DB_ASSERT(hp->hash_page_dirty != 0);
- --hp->hash_page_dirty;
- F_CLR(bhp, BH_DIRTY);
- }
- if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
- ++hp->hash_page_dirty;
- F_SET(bhp, BH_DIRTY);
- }
- if (LF_ISSET(DB_MPOOL_DISCARD))
- F_SET(bhp, BH_DISCARD);
-
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- return (0);
-}
diff --git a/bdb/mp/mp_method.c b/bdb/mp/mp_method.c
deleted file mode 100644
index 38f0a645f16..00000000000
--- a/bdb/mp/mp_method.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_method.c,v 11.29 2002/03/27 04:32:27 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#ifdef HAVE_RPC
-#include <rpc/rpc.h>
-#endif
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-#ifdef HAVE_RPC
-#include "dbinc_auto/db_server.h"
-#include "dbinc_auto/rpc_client_ext.h"
-#endif
-
-static int __memp_set_cachesize __P((DB_ENV *, u_int32_t, u_int32_t, int));
-static int __memp_set_mp_mmapsize __P((DB_ENV *, size_t));
-
-/*
- * __memp_dbenv_create --
- * Mpool specific creation of the DB_ENV structure.
- *
- * PUBLIC: void __memp_dbenv_create __P((DB_ENV *));
- */
-void
-__memp_dbenv_create(dbenv)
- DB_ENV *dbenv;
-{
- /*
- * !!!
- * Our caller has not yet had the opportunity to reset the panic
- * state or turn off mutex locking, and so we can neither check
- * the panic state or acquire a mutex in the DB_ENV create path.
- *
- * We default to 32 8K pages. We don't default to a flat 256K, because
- * some systems require significantly more memory to hold 32 pages than
- * others. For example, HP-UX with POSIX pthreads needs 88 bytes for
- * a POSIX pthread mutex and almost 200 bytes per buffer header, while
- * Solaris needs 24 and 52 bytes for the same structures. The minimum
- * number of hash buckets is 37. These contain a mutex also.
- */
- dbenv->mp_bytes =
- 32 * ((8 * 1024) + sizeof(BH)) + 37 * sizeof(DB_MPOOL_HASH);
- dbenv->mp_ncache = 1;
-
-#ifdef HAVE_RPC
- if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) {
- dbenv->set_cachesize = __dbcl_env_cachesize;
- dbenv->set_mp_mmapsize = __dbcl_set_mp_mmapsize;
- dbenv->memp_dump_region = NULL;
- dbenv->memp_fcreate = __dbcl_memp_fcreate;
- dbenv->memp_nameop = NULL;
- dbenv->memp_register = __dbcl_memp_register;
- dbenv->memp_stat = __dbcl_memp_stat;
- dbenv->memp_sync = __dbcl_memp_sync;
- dbenv->memp_trickle = __dbcl_memp_trickle;
- } else
-#endif
- {
- dbenv->set_cachesize = __memp_set_cachesize;
- dbenv->set_mp_mmapsize = __memp_set_mp_mmapsize;
- dbenv->memp_dump_region = __memp_dump_region;
- dbenv->memp_fcreate = __memp_fcreate;
- dbenv->memp_nameop = __memp_nameop;
- dbenv->memp_register = __memp_register;
- dbenv->memp_stat = __memp_stat;
- dbenv->memp_sync = __memp_sync;
- dbenv->memp_trickle = __memp_trickle;
- }
-}
-
-/*
- * __memp_set_cachesize --
- * Initialize the cache size.
- */
-static int
-__memp_set_cachesize(dbenv, gbytes, bytes, ncache)
- DB_ENV *dbenv;
- u_int32_t gbytes, bytes;
- int ncache;
-{
- ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_cachesize");
-
- /* Normalize the values. */
- if (ncache == 0)
- ncache = 1;
-
- /*
- * You can only store 4GB-1 in an unsigned 32-bit value, so correct for
- * applications that specify 4GB cache sizes -- we know what they meant.
- */
- if (gbytes / ncache == 4 && bytes == 0) {
- --gbytes;
- bytes = GIGABYTE - 1;
- } else {
- gbytes += bytes / GIGABYTE;
- bytes %= GIGABYTE;
- }
-
- /* Avoid too-large cache sizes, they result in a region size of zero. */
- if (gbytes / ncache > 4 || (gbytes / ncache == 4 && bytes != 0)) {
- __db_err(dbenv, "individual cache size too large");
- return (EINVAL);
- }
-
- /*
- * If the application requested less than 500Mb, increase the cachesize
- * by 25% and factor in the size of the hash buckets to account for our
- * overhead. (I'm guessing caches over 500Mb are specifically sized,
- * that is, it's a large server and the application actually knows how
- * much memory is available. We only document the 25% overhead number,
- * not the hash buckets, but I don't see a reason to confuse the issue,
- * it shouldn't matter to an application.)
- *
- * There is a minimum cache size, regardless.
- */
- if (gbytes == 0) {
- if (bytes < 500 * MEGABYTE)
- bytes += (bytes / 4) + 37 * sizeof(DB_MPOOL_HASH);
- if (bytes / ncache < DB_CACHESIZE_MIN)
- bytes = ncache * DB_CACHESIZE_MIN;
- }
-
- dbenv->mp_gbytes = gbytes;
- dbenv->mp_bytes = bytes;
- dbenv->mp_ncache = ncache;
-
- return (0);
-}
-
-/*
- * __memp_set_mp_mmapsize --
- * Set the maximum mapped file size.
- */
-static int
-__memp_set_mp_mmapsize(dbenv, mp_mmapsize )
- DB_ENV *dbenv;
- size_t mp_mmapsize;
-{
- dbenv->mp_mmapsize = mp_mmapsize;
- return (0);
-}
diff --git a/bdb/mp/mp_region.c b/bdb/mp/mp_region.c
deleted file mode 100644
index 06eca2f8646..00000000000
--- a/bdb/mp/mp_region.c
+++ /dev/null
@@ -1,466 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_region.c,v 11.49 2002/05/07 18:42:20 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-static int __mpool_init __P((DB_ENV *, DB_MPOOL *, int, int));
-#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
-static size_t __mpool_region_maint __P((REGINFO *));
-#endif
-
-/*
- * __memp_open --
- * Internal version of memp_open: only called from DB_ENV->open.
- *
- * PUBLIC: int __memp_open __P((DB_ENV *));
- */
-int
-__memp_open(dbenv)
- DB_ENV *dbenv;
-{
- DB_MPOOL *dbmp;
- MPOOL *mp;
- REGINFO reginfo;
- roff_t reg_size, *regids;
- u_int32_t i;
- int htab_buckets, ret;
-
- /* Figure out how big each cache region is. */
- reg_size = (dbenv->mp_gbytes / dbenv->mp_ncache) * GIGABYTE;
- reg_size += ((dbenv->mp_gbytes %
- dbenv->mp_ncache) * GIGABYTE) / dbenv->mp_ncache;
- reg_size += dbenv->mp_bytes / dbenv->mp_ncache;
-
- /*
- * Figure out how many hash buckets each region will have. Assume we
- * want to keep the hash chains with under 10 pages on each chain. We
- * don't know the pagesize in advance, and it may differ for different
- * files. Use a pagesize of 1K for the calculation -- we walk these
- * chains a lot, they must be kept short.
- */
- htab_buckets = __db_tablesize((reg_size / (1 * 1024)) / 10);
-
- /* Create and initialize the DB_MPOOL structure. */
- if ((ret = __os_calloc(dbenv, 1, sizeof(*dbmp), &dbmp)) != 0)
- return (ret);
- LIST_INIT(&dbmp->dbregq);
- TAILQ_INIT(&dbmp->dbmfq);
- dbmp->dbenv = dbenv;
-
- /* Join/create the first mpool region. */
- memset(&reginfo, 0, sizeof(REGINFO));
- reginfo.type = REGION_TYPE_MPOOL;
- reginfo.id = INVALID_REGION_ID;
- reginfo.mode = dbenv->db_mode;
- reginfo.flags = REGION_JOIN_OK;
- if (F_ISSET(dbenv, DB_ENV_CREATE))
- F_SET(&reginfo, REGION_CREATE_OK);
- if ((ret = __db_r_attach(dbenv, &reginfo, reg_size)) != 0)
- goto err;
-
- /*
- * If we created the region, initialize it. Create or join any
- * additional regions.
- */
- if (F_ISSET(&reginfo, REGION_CREATE)) {
- /*
- * We define how many regions there are going to be, allocate
- * the REGINFO structures and create them. Make sure we don't
- * clear the wrong entries on error.
- */
- dbmp->nreg = dbenv->mp_ncache;
- if ((ret = __os_calloc(dbenv,
- dbmp->nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0)
- goto err;
- /* Make sure we don't clear the wrong entries on error. */
- for (i = 0; i < dbmp->nreg; ++i)
- dbmp->reginfo[i].id = INVALID_REGION_ID;
- dbmp->reginfo[0] = reginfo;
-
- /* Initialize the first region. */
- if ((ret = __mpool_init(dbenv, dbmp, 0, htab_buckets)) != 0)
- goto err;
-
- /*
- * Create/initialize remaining regions and copy their IDs into
- * the first region.
- */
- mp = R_ADDR(dbmp->reginfo, dbmp->reginfo[0].rp->primary);
- regids = R_ADDR(dbmp->reginfo, mp->regids);
- for (i = 1; i < dbmp->nreg; ++i) {
- dbmp->reginfo[i].type = REGION_TYPE_MPOOL;
- dbmp->reginfo[i].id = INVALID_REGION_ID;
- dbmp->reginfo[i].mode = dbenv->db_mode;
- dbmp->reginfo[i].flags = REGION_CREATE_OK;
- if ((ret = __db_r_attach(
- dbenv, &dbmp->reginfo[i], reg_size)) != 0)
- goto err;
- if ((ret =
- __mpool_init(dbenv, dbmp, i, htab_buckets)) != 0)
- goto err;
- R_UNLOCK(dbenv, &dbmp->reginfo[i]);
-
- regids[i] = dbmp->reginfo[i].id;
- }
-
- R_UNLOCK(dbenv, dbmp->reginfo);
- } else {
- /*
- * Determine how many regions there are going to be, allocate
- * the REGINFO structures and fill in local copies of that
- * information.
- */
- mp = R_ADDR(&reginfo, reginfo.rp->primary);
- dbmp->nreg = mp->nreg;
- if ((ret = __os_calloc(dbenv,
- dbmp->nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0)
- goto err;
- /* Make sure we don't clear the wrong entries on error. */
- for (i = 0; i < dbmp->nreg; ++i)
- dbmp->reginfo[i].id = INVALID_REGION_ID;
- dbmp->reginfo[0] = reginfo;
-
- /*
- * We have to unlock the primary mpool region before we attempt
- * to join the additional mpool regions. If we don't, we can
- * deadlock. The scenario is that we hold the primary mpool
- * region lock. We then try to attach to an additional mpool
- * region, which requires the acquisition/release of the main
- * region lock (to search the list of regions). If another
- * thread of control already holds the main region lock and is
- * waiting on our primary mpool region lock, we'll deadlock.
- * See [#4696] for more information.
- */
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- /* Join remaining regions. */
- regids = R_ADDR(dbmp->reginfo, mp->regids);
- for (i = 1; i < dbmp->nreg; ++i) {
- dbmp->reginfo[i].type = REGION_TYPE_MPOOL;
- dbmp->reginfo[i].id = regids[i];
- dbmp->reginfo[i].mode = 0;
- dbmp->reginfo[i].flags = REGION_JOIN_OK;
- if ((ret = __db_r_attach(
- dbenv, &dbmp->reginfo[i], 0)) != 0)
- goto err;
- R_UNLOCK(dbenv, &dbmp->reginfo[i]);
- }
- }
-
- /* Set the local addresses for the regions. */
- for (i = 0; i < dbmp->nreg; ++i)
- dbmp->reginfo[i].primary =
- R_ADDR(&dbmp->reginfo[i], dbmp->reginfo[i].rp->primary);
-
- /* If the region is threaded, allocate a mutex to lock the handles. */
- if (F_ISSET(dbenv, DB_ENV_THREAD) &&
- (ret = __db_mutex_setup(dbenv, dbmp->reginfo, &dbmp->mutexp,
- MUTEX_ALLOC | MUTEX_THREAD)) != 0)
- goto err;
-
- dbenv->mp_handle = dbmp;
- return (0);
-
-err: if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) {
- if (F_ISSET(dbmp->reginfo, REGION_CREATE))
- ret = __db_panic(dbenv, ret);
-
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- for (i = 0; i < dbmp->nreg; ++i)
- if (dbmp->reginfo[i].id != INVALID_REGION_ID)
- (void)__db_r_detach(
- dbenv, &dbmp->reginfo[i], 0);
- __os_free(dbenv, dbmp->reginfo);
- }
- if (dbmp->mutexp != NULL)
- __db_mutex_free(dbenv, dbmp->reginfo, dbmp->mutexp);
- __os_free(dbenv, dbmp);
- return (ret);
-}
-
-/*
- * __mpool_init --
- * Initialize a MPOOL structure in shared memory.
- */
-static int
-__mpool_init(dbenv, dbmp, reginfo_off, htab_buckets)
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- int reginfo_off, htab_buckets;
-{
- DB_MPOOL_HASH *htab;
- MPOOL *mp;
- REGINFO *reginfo;
-#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
- size_t maint_size;
-#endif
- int i, ret;
- void *p;
-
- mp = NULL;
-
- reginfo = &dbmp->reginfo[reginfo_off];
- if ((ret = __db_shalloc(reginfo->addr,
- sizeof(MPOOL), MUTEX_ALIGN, &reginfo->primary)) != 0)
- goto mem_err;
- reginfo->rp->primary = R_OFFSET(reginfo, reginfo->primary);
- mp = reginfo->primary;
- memset(mp, 0, sizeof(*mp));
-
-#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
- maint_size = __mpool_region_maint(reginfo);
- /* Allocate room for the maintenance info and initialize it. */
- if ((ret = __db_shalloc(reginfo->addr,
- sizeof(REGMAINT) + maint_size, 0, &p)) != 0)
- goto mem_err;
- __db_maintinit(reginfo, p, maint_size);
- mp->maint_off = R_OFFSET(reginfo, p);
-#endif
-
- if (reginfo_off == 0) {
- SH_TAILQ_INIT(&mp->mpfq);
-
- ZERO_LSN(mp->lsn);
-
- mp->nreg = dbmp->nreg;
- if ((ret = __db_shalloc(dbmp->reginfo[0].addr,
- dbmp->nreg * sizeof(int), 0, &p)) != 0)
- goto mem_err;
- mp->regids = R_OFFSET(dbmp->reginfo, p);
- }
-
- /* Allocate hash table space and initialize it. */
- if ((ret = __db_shalloc(reginfo->addr,
- htab_buckets * sizeof(DB_MPOOL_HASH), 0, &htab)) != 0)
- goto mem_err;
- mp->htab = R_OFFSET(reginfo, htab);
- for (i = 0; i < htab_buckets; i++) {
- if ((ret = __db_mutex_setup(dbenv,
- reginfo, &htab[i].hash_mutex,
- MUTEX_NO_RLOCK)) != 0)
- return (ret);
- SH_TAILQ_INIT(&htab[i].hash_bucket);
- htab[i].hash_page_dirty = htab[i].hash_priority = 0;
- }
- mp->htab_buckets = mp->stat.st_hash_buckets = htab_buckets;
-
- /*
- * Only the environment creator knows the total cache size, fill in
- * those statistics now.
- */
- mp->stat.st_gbytes = dbenv->mp_gbytes;
- mp->stat.st_bytes = dbenv->mp_bytes;
- return (0);
-
-mem_err:__db_err(dbenv, "Unable to allocate memory for mpool region");
- return (ret);
-}
-
-/*
- * __memp_dbenv_refresh --
- * Clean up after the mpool system on a close or failed open.
- *
- * PUBLIC: int __memp_dbenv_refresh __P((DB_ENV *));
- */
-int
-__memp_dbenv_refresh(dbenv)
- DB_ENV *dbenv;
-{
- DB_MPOOL *dbmp;
- DB_MPOOLFILE *dbmfp;
- DB_MPREG *mpreg;
- u_int32_t i;
- int ret, t_ret;
-
- ret = 0;
- dbmp = dbenv->mp_handle;
-
- /* Discard DB_MPREGs. */
- while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) {
- LIST_REMOVE(mpreg, q);
- __os_free(dbenv, mpreg);
- }
-
- /* Discard DB_MPOOLFILEs. */
- while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL)
- if ((t_ret = __memp_fclose_int(dbmfp, 0)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Discard the thread mutex. */
- if (dbmp->mutexp != NULL)
- __db_mutex_free(dbenv, dbmp->reginfo, dbmp->mutexp);
-
- /* Detach from the region(s). */
- for (i = 0; i < dbmp->nreg; ++i)
- if ((t_ret = __db_r_detach(
- dbenv, &dbmp->reginfo[i], 0)) != 0 && ret == 0)
- ret = t_ret;
-
- __os_free(dbenv, dbmp->reginfo);
- __os_free(dbenv, dbmp);
-
- dbenv->mp_handle = NULL;
- return (ret);
-}
-
-#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
-/*
- * __mpool_region_maint --
- * Return the amount of space needed for region maintenance info.
- *
- */
-static size_t
-__mpool_region_maint(infop)
- REGINFO *infop;
-{
- size_t s;
- int numlocks;
-
- /*
- * For mutex maintenance we need one mutex per possible page.
- * Compute the maximum number of pages this cache can have.
- * Also add in an mpool mutex and mutexes for all dbenv and db
- * handles.
- */
- numlocks = ((infop->rp->size / DB_MIN_PGSIZE) + 1);
- numlocks += DB_MAX_HANDLES;
- s = sizeof(roff_t) * numlocks;
- return (s);
-}
-#endif
-
-/*
- * __mpool_region_destroy
- * Destroy any region maintenance info.
- *
- * PUBLIC: void __mpool_region_destroy __P((DB_ENV *, REGINFO *));
- */
-void
-__mpool_region_destroy(dbenv, infop)
- DB_ENV *dbenv;
- REGINFO *infop;
-{
- __db_shlocks_destroy(infop, (REGMAINT *)R_ADDR(infop,
- ((MPOOL *)R_ADDR(infop, infop->rp->primary))->maint_off));
-
- COMPQUIET(dbenv, NULL);
- COMPQUIET(infop, NULL);
-}
-
-/*
- * __memp_nameop
- * Remove or rename a file in the pool.
- *
- * PUBLIC: int __memp_nameop __P((DB_ENV *,
- * PUBLIC: u_int8_t *, const char *, const char *, const char *));
- *
- * XXX
- * Undocumented interface: DB private.
- */
-int
-__memp_nameop(dbenv, fileid, newname, fullold, fullnew)
- DB_ENV *dbenv;
- u_int8_t *fileid;
- const char *newname, *fullold, *fullnew;
-{
- DB_MPOOL *dbmp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- roff_t newname_off;
- int locked, ret;
- void *p;
-
- locked = 0;
- dbmp = NULL;
-
- if (!MPOOL_ON(dbenv))
- goto fsop;
-
- dbmp = dbenv->mp_handle;
- mp = dbmp->reginfo[0].primary;
-
- /*
- * Remove or rename a file that the mpool might know about. We assume
- * that the fop layer has the file locked for exclusive access, so we
- * don't worry about locking except for the mpool mutexes. Checkpoint
- * can happen at any time, independent of file locking, so we have to
- * do the actual unlink or rename system call to avoid any race.
- *
- * If this is a rename, allocate first, because we can't recursively
- * grab the region lock.
- */
- if (newname == NULL)
- p = NULL;
- else {
- if ((ret = __memp_alloc(dbmp, dbmp->reginfo,
- NULL, strlen(newname) + 1, &newname_off, &p)) != 0)
- return (ret);
- memcpy(p, newname, strlen(newname) + 1);
- }
-
- locked = 1;
- R_LOCK(dbenv, dbmp->reginfo);
-
- /*
- * Find the file -- if mpool doesn't know about this file, that's not
- * an error-- we may not have it open.
- */
- for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- /* Ignore non-active files. */
- if (F_ISSET(mfp, MP_DEADFILE | MP_TEMP))
- continue;
-
- /* Ignore non-matching files. */
- if (memcmp(fileid, R_ADDR(
- dbmp->reginfo, mfp->fileid_off), DB_FILE_ID_LEN) != 0)
- continue;
-
- /* If newname is NULL, we're removing the file. */
- if (newname == NULL) {
- MUTEX_LOCK(dbenv, &mfp->mutex);
- MPOOLFILE_IGNORE(mfp);
- MUTEX_UNLOCK(dbenv, &mfp->mutex);
- } else {
- /*
- * Else, it's a rename. We've allocated memory
- * for the new name. Swap it with the old one.
- */
- p = R_ADDR(dbmp->reginfo, mfp->path_off);
- mfp->path_off = newname_off;
- }
- break;
- }
-
- /* Delete the memory we no longer need. */
- if (p != NULL)
- __db_shalloc_free(dbmp->reginfo[0].addr, p);
-
-fsop: if (newname == NULL)
- (void)__os_unlink(dbenv, fullold);
- else
- (void)__os_rename(dbenv, fullold, fullnew, 1);
-
- if (locked)
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- return (0);
-}
diff --git a/bdb/mp/mp_register.c b/bdb/mp/mp_register.c
deleted file mode 100644
index 46eefad986f..00000000000
--- a/bdb/mp/mp_register.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_register.c,v 11.21 2002/03/27 04:32:27 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-/*
- * memp_register --
- * Register a file type's pgin, pgout routines.
- *
- * PUBLIC: int __memp_register __P((DB_ENV *, int,
- * PUBLIC: int (*)(DB_ENV *, db_pgno_t, void *, DBT *),
- * PUBLIC: int (*)(DB_ENV *, db_pgno_t, void *, DBT *)));
- */
-int
-__memp_register(dbenv, ftype, pgin, pgout)
- DB_ENV *dbenv;
- int ftype;
- int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *));
- int (*pgout) __P((DB_ENV *, db_pgno_t, void *, DBT *));
-{
- DB_MPOOL *dbmp;
- DB_MPREG *mpreg;
- int ret;
-
- PANIC_CHECK(dbenv);
- ENV_REQUIRES_CONFIG(dbenv,
- dbenv->mp_handle, "DB_ENV->memp_register", DB_INIT_MPOOL);
-
- dbmp = dbenv->mp_handle;
-
- /*
- * Chances are good that the item has already been registered, as the
- * DB access methods are the folks that call this routine. If already
- * registered, just update the entry, although it's probably unchanged.
- */
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- for (mpreg = LIST_FIRST(&dbmp->dbregq);
- mpreg != NULL; mpreg = LIST_NEXT(mpreg, q))
- if (mpreg->ftype == ftype) {
- mpreg->pgin = pgin;
- mpreg->pgout = pgout;
- break;
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
- if (mpreg != NULL)
- return (0);
-
- /* New entry. */
- if ((ret = __os_malloc(dbenv, sizeof(DB_MPREG), &mpreg)) != 0)
- return (ret);
-
- mpreg->ftype = ftype;
- mpreg->pgin = pgin;
- mpreg->pgout = pgout;
-
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- LIST_INSERT_HEAD(&dbmp->dbregq, mpreg, q);
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
-
- return (0);
-}
diff --git a/bdb/mp/mp_stat.c b/bdb/mp/mp_stat.c
deleted file mode 100644
index 12e72b91d70..00000000000
--- a/bdb/mp/mp_stat.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_stat.c,v 11.51 2002/08/06 06:13:47 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/db_am.h"
-#include "dbinc/mp.h"
-
-static void __memp_dumpcache __P((DB_ENV *,
- DB_MPOOL *, REGINFO *, size_t *, FILE *, u_int32_t));
-static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *));
-static void __memp_stat_wait __P((REGINFO *, MPOOL *, DB_MPOOL_STAT *, int));
-
-/*
- * __memp_stat --
- * Display MPOOL statistics.
- *
- * PUBLIC: int __memp_stat
- * PUBLIC: __P((DB_ENV *, DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, u_int32_t));
- */
-int
-__memp_stat(dbenv, gspp, fspp, flags)
- DB_ENV *dbenv;
- DB_MPOOL_STAT **gspp;
- DB_MPOOL_FSTAT ***fspp;
- u_int32_t flags;
-{
- DB_MPOOL *dbmp;
- DB_MPOOL_FSTAT **tfsp, *tstruct;
- DB_MPOOL_STAT *sp;
- MPOOL *c_mp, *mp;
- MPOOLFILE *mfp;
- size_t len, nlen, pagesize;
- u_int32_t pages, i;
- int ret;
- char *name, *tname;
-
- PANIC_CHECK(dbenv);
- ENV_REQUIRES_CONFIG(dbenv,
- dbenv->mp_handle, "memp_stat", DB_INIT_MPOOL);
-
- if ((ret = __db_fchk(dbenv,
- "DB_ENV->memp_stat", flags, DB_STAT_CLEAR)) != 0)
- return (ret);
-
- dbmp = dbenv->mp_handle;
- mp = dbmp->reginfo[0].primary;
-
- /* Global statistics. */
- if (gspp != NULL) {
- *gspp = NULL;
-
- if ((ret = __os_umalloc(dbenv, sizeof(**gspp), gspp)) != 0)
- return (ret);
- memset(*gspp, 0, sizeof(**gspp));
- sp = *gspp;
-
- /*
- * Initialization and information that is not maintained on
- * a per-cache basis.
- */
- c_mp = dbmp->reginfo[0].primary;
- sp->st_gbytes = c_mp->stat.st_gbytes;
- sp->st_bytes = c_mp->stat.st_bytes;
- sp->st_ncache = dbmp->nreg;
- sp->st_regsize = dbmp->reginfo[0].rp->size;
-
- /* Walk the cache list and accumulate the global information. */
- for (i = 0; i < mp->nreg; ++i) {
- c_mp = dbmp->reginfo[i].primary;
-
- sp->st_map += c_mp->stat.st_map;
- sp->st_cache_hit += c_mp->stat.st_cache_hit;
- sp->st_cache_miss += c_mp->stat.st_cache_miss;
- sp->st_page_create += c_mp->stat.st_page_create;
- sp->st_page_in += c_mp->stat.st_page_in;
- sp->st_page_out += c_mp->stat.st_page_out;
- sp->st_ro_evict += c_mp->stat.st_ro_evict;
- sp->st_rw_evict += c_mp->stat.st_rw_evict;
- sp->st_page_trickle += c_mp->stat.st_page_trickle;
- sp->st_pages += c_mp->stat.st_pages;
- /*
- * st_page_dirty calculated by __memp_stat_hash
- * st_page_clean calculated here
- */
- __memp_stat_hash(
- &dbmp->reginfo[i], c_mp, &sp->st_page_dirty);
- sp->st_page_clean = sp->st_pages - sp->st_page_dirty;
- sp->st_hash_buckets += c_mp->stat.st_hash_buckets;
- sp->st_hash_searches += c_mp->stat.st_hash_searches;
- sp->st_hash_longest += c_mp->stat.st_hash_longest;
- sp->st_hash_examined += c_mp->stat.st_hash_examined;
- /*
- * st_hash_nowait calculated by __memp_stat_wait
- * st_hash_wait
- */
- __memp_stat_wait(&dbmp->reginfo[i], c_mp, sp, flags);
- sp->st_region_nowait +=
- dbmp->reginfo[i].rp->mutex.mutex_set_nowait;
- sp->st_region_wait +=
- dbmp->reginfo[i].rp->mutex.mutex_set_wait;
- sp->st_alloc += c_mp->stat.st_alloc;
- sp->st_alloc_buckets += c_mp->stat.st_alloc_buckets;
- if (sp->st_alloc_max_buckets <
- c_mp->stat.st_alloc_max_buckets)
- sp->st_alloc_max_buckets =
- c_mp->stat.st_alloc_max_buckets;
- sp->st_alloc_pages += c_mp->stat.st_alloc_pages;
- if (sp->st_alloc_max_pages <
- c_mp->stat.st_alloc_max_pages)
- sp->st_alloc_max_pages =
- c_mp->stat.st_alloc_max_pages;
-
- if (LF_ISSET(DB_STAT_CLEAR)) {
- dbmp->reginfo[i].rp->mutex.mutex_set_wait = 0;
- dbmp->reginfo[i].rp->mutex.mutex_set_nowait = 0;
- pages = c_mp->stat.st_pages;
- memset(&c_mp->stat, 0, sizeof(c_mp->stat));
- c_mp->stat.st_hash_buckets = c_mp->htab_buckets;
- c_mp->stat.st_pages = pages;
- }
- }
-
- /*
- * We have duplicate statistics fields in per-file structures
- * and the cache. The counters are only incremented in the
- * per-file structures, except if a file is flushed from the
- * mpool, at which time we copy its information into the cache
- * statistics. We added the cache information above, now we
- * add the per-file information.
- */
- R_LOCK(dbenv, dbmp->reginfo);
- for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- sp->st_map += mfp->stat.st_map;
- sp->st_cache_hit += mfp->stat.st_cache_hit;
- sp->st_cache_miss += mfp->stat.st_cache_miss;
- sp->st_page_create += mfp->stat.st_page_create;
- sp->st_page_in += mfp->stat.st_page_in;
- sp->st_page_out += mfp->stat.st_page_out;
- if (fspp == NULL && LF_ISSET(DB_STAT_CLEAR)) {
- pagesize = mfp->stat.st_pagesize;
- memset(&mfp->stat, 0, sizeof(mfp->stat));
- mfp->stat.st_pagesize = pagesize;
- }
- }
- R_UNLOCK(dbenv, dbmp->reginfo);
- }
-
- /* Per-file statistics. */
- if (fspp != NULL) {
- *fspp = NULL;
-
- /* Count the MPOOLFILE structures. */
- R_LOCK(dbenv, dbmp->reginfo);
- for (i = 0, len = 0,
- mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
- mfp != NULL;
- ++i, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
- len += sizeof(DB_MPOOL_FSTAT *) +
- sizeof(DB_MPOOL_FSTAT) +
- strlen(__memp_fns(dbmp, mfp)) + 1;
- len += sizeof(DB_MPOOL_FSTAT *); /* Trailing NULL */
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- if (i == 0)
- return (0);
-
- /* Allocate space */
- if ((ret = __os_umalloc(dbenv, len, fspp)) != 0)
- return (ret);
-
- /*
- * Build each individual entry. We assume that an array of
- * pointers are aligned correctly to be followed by an array
- * of structures, which should be safe (in this particular
- * case, the first element of the structure is a pointer, so
- * we're doubly safe). The array is followed by space for
- * the text file names.
- *
- * Add 1 to i because we need to skip over the NULL.
- */
- tfsp = *fspp;
- tstruct = (DB_MPOOL_FSTAT *)(tfsp + i + 1);
- tname = (char *)(tstruct + i);
-
- /*
- * Files may have been opened since we counted, don't walk
- * off the end of the allocated space.
- */
- R_LOCK(dbenv, dbmp->reginfo);
- for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
- mfp != NULL && i-- > 0;
- ++tfsp, ++tstruct, tname += nlen,
- mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- name = __memp_fns(dbmp, mfp);
- nlen = strlen(name) + 1;
- *tfsp = tstruct;
- *tstruct = mfp->stat;
- if (LF_ISSET(DB_STAT_CLEAR)) {
- pagesize = mfp->stat.st_pagesize;
- memset(&mfp->stat, 0, sizeof(mfp->stat));
- mfp->stat.st_pagesize = pagesize;
- }
- tstruct->file_name = tname;
- memcpy(tname, name, nlen);
- }
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- *tfsp = NULL;
- }
- return (0);
-}
-
-#define FMAP_ENTRIES 200 /* Files we map. */
-
-#define MPOOL_DUMP_HASH 0x01 /* Debug hash chains. */
-#define MPOOL_DUMP_MEM 0x04 /* Debug region memory. */
-#define MPOOL_DUMP_ALL 0x07 /* Debug all. */
-
-/*
- * __memp_dump_region --
- * Display MPOOL structures.
- *
- * PUBLIC: int __memp_dump_region __P((DB_ENV *, char *, FILE *));
- */
-int
-__memp_dump_region(dbenv, area, fp)
- DB_ENV *dbenv;
- char *area;
- FILE *fp;
-{
- static const FN fn[] = {
- { MP_CAN_MMAP, "mmapped" },
- { MP_DEADFILE, "dead" },
- { MP_DIRECT, "no buffer" },
- { MP_EXTENT, "extent" },
- { MP_TEMP, "temporary" },
- { MP_UNLINK, "unlink" },
- { 0, NULL }
- };
- DB_MPOOL *dbmp;
- DB_MPOOLFILE *dbmfp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- size_t fmap[FMAP_ENTRIES + 1];
- u_int32_t i, flags;
- int cnt;
- u_int8_t *p;
-
- PANIC_CHECK(dbenv);
- ENV_REQUIRES_CONFIG(dbenv,
- dbenv->mp_handle, "memp_dump_region", DB_INIT_MPOOL);
-
- dbmp = dbenv->mp_handle;
-
- /* Make it easy to call from the debugger. */
- if (fp == NULL)
- fp = stderr;
-
- for (flags = 0; *area != '\0'; ++area)
- switch (*area) {
- case 'A':
- LF_SET(MPOOL_DUMP_ALL);
- break;
- case 'h':
- LF_SET(MPOOL_DUMP_HASH);
- break;
- case 'm':
- LF_SET(MPOOL_DUMP_MEM);
- break;
- }
-
- mp = dbmp->reginfo[0].primary;
-
- /* Display MPOOL structures. */
- (void)fprintf(fp, "%s\nPool (region addr 0x%lx)\n",
- DB_LINE, P_TO_ULONG(dbmp->reginfo[0].addr));
-
- /* Display the MPOOLFILE structures. */
- R_LOCK(dbenv, dbmp->reginfo);
- for (cnt = 0, mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) {
- (void)fprintf(fp, "File #%d: %s: pagesize %lu\n", cnt + 1,
- __memp_fns(dbmp, mfp), (u_long)mfp->stat.st_pagesize);
- (void)fprintf(fp, "\t type %ld; ref %lu; blocks %lu; last %lu;",
- (long)mfp->ftype, (u_long)mfp->mpf_cnt,
- (u_long)mfp->block_cnt, (u_long)mfp->last_pgno);
- __db_prflags(mfp->flags, fn, fp);
-
- (void)fprintf(fp, "\n\t UID: ");
- p = R_ADDR(dbmp->reginfo, mfp->fileid_off);
- for (i = 0; i < DB_FILE_ID_LEN; ++i, ++p) {
- (void)fprintf(fp, "%x", (u_int)*p);
- if (i < DB_FILE_ID_LEN - 1)
- (void)fprintf(fp, " ");
- }
- (void)fprintf(fp, "\n");
- if (cnt < FMAP_ENTRIES)
- fmap[cnt] = R_OFFSET(dbmp->reginfo, mfp);
- }
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
- dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) {
- (void)fprintf(fp, "File #%d: %s: per-process, %s\n",
- cnt + 1, __memp_fn(dbmfp),
- F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
- if (cnt < FMAP_ENTRIES)
- fmap[cnt] = R_OFFSET(dbmp->reginfo, mfp);
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
- if (cnt < FMAP_ENTRIES)
- fmap[cnt] = INVALID_ROFF;
- else
- fmap[FMAP_ENTRIES] = INVALID_ROFF;
-
- /* Dump the memory pools. */
- for (i = 0; i < mp->nreg; ++i) {
- (void)fprintf(fp, "%s\nCache #%d:\n", DB_LINE, i + 1);
- __memp_dumpcache(
- dbenv, dbmp, &dbmp->reginfo[i], fmap, fp, flags);
- }
-
- /* Flush in case we're debugging. */
- (void)fflush(fp);
-
- return (0);
-}
-
-/*
- * __memp_dumpcache --
- * Display statistics for a cache.
- */
-static void
-__memp_dumpcache(dbenv, dbmp, reginfo, fmap, fp, flags)
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
- REGINFO *reginfo;
- size_t *fmap;
- FILE *fp;
- u_int32_t flags;
-{
- BH *bhp;
- DB_MPOOL_HASH *hp;
- MPOOL *c_mp;
- int bucket;
-
- c_mp = reginfo->primary;
-
- /* Display the hash table list of BH's. */
- if (LF_ISSET(MPOOL_DUMP_HASH)) {
- (void)fprintf(fp,
- "%s\nBH hash table (%lu hash slots)\nbucket (priority):\n",
- DB_LINE, (u_long)c_mp->htab_buckets);
- (void)fprintf(fp,
- "\tpageno, file, ref, address [LSN] priority\n");
-
- for (hp = R_ADDR(reginfo, c_mp->htab),
- bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
- if ((bhp =
- SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) != NULL)
- (void)fprintf(fp, "%lu (%u):\n",
- (u_long)bucket, hp->hash_priority);
- for (; bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
- __memp_pbh(dbmp, bhp, fmap, fp);
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
- }
- }
-
- /* Dump the memory pool. */
- if (LF_ISSET(MPOOL_DUMP_MEM))
- __db_shalloc_dump(reginfo->addr, fp);
-}
-
-/*
- * __memp_pbh --
- * Display a BH structure.
- */
-static void
-__memp_pbh(dbmp, bhp, fmap, fp)
- DB_MPOOL *dbmp;
- BH *bhp;
- size_t *fmap;
- FILE *fp;
-{
- static const FN fn[] = {
- { BH_CALLPGIN, "callpgin" },
- { BH_DIRTY, "dirty" },
- { BH_DIRTY_CREATE, "created" },
- { BH_DISCARD, "discard" },
- { BH_LOCKED, "locked" },
- { BH_TRASH, "trash" },
- { 0, NULL }
- };
- int i;
-
- for (i = 0; i < FMAP_ENTRIES; ++i)
- if (fmap[i] == INVALID_ROFF || fmap[i] == bhp->mf_offset)
- break;
-
- if (fmap[i] == INVALID_ROFF)
- (void)fprintf(fp, "\t%5lu, %lu, %2lu, %8lu [%lu,%lu] %lu",
- (u_long)bhp->pgno, (u_long)bhp->mf_offset,
- (u_long)bhp->ref, (u_long)R_OFFSET(dbmp->reginfo, bhp),
- (u_long)LSN(bhp->buf).file, (u_long)LSN(bhp->buf).offset,
- (u_long)bhp->priority);
- else
- (void)fprintf(fp, "\t%5lu, #%d, %2lu, %8lu [%lu,%lu] %lu",
- (u_long)bhp->pgno, i + 1,
- (u_long)bhp->ref, (u_long)R_OFFSET(dbmp->reginfo, bhp),
- (u_long)LSN(bhp->buf).file, (u_long)LSN(bhp->buf).offset,
- (u_long)bhp->priority);
-
- __db_prflags(bhp->flags, fn, fp);
-
- (void)fprintf(fp, "\n");
-}
-
-/*
- * __memp_stat_hash --
- * Total hash bucket stats (other than mutex wait) into the region.
- *
- * PUBLIC: void __memp_stat_hash __P((REGINFO *, MPOOL *, u_int32_t *));
- */
-void
-__memp_stat_hash(reginfo, mp, dirtyp)
- REGINFO *reginfo;
- MPOOL *mp;
- u_int32_t *dirtyp;
-{
- DB_MPOOL_HASH *hp;
- u_int32_t dirty;
- int i;
-
- hp = R_ADDR(reginfo, mp->htab);
- for (i = 0, dirty = 0; i < mp->htab_buckets; i++, hp++)
- dirty += hp->hash_page_dirty;
- *dirtyp = dirty;
-}
-
-/*
- * __memp_stat_wait --
- * Total hash bucket wait stats into the region.
- */
-static void
-__memp_stat_wait(reginfo, mp, mstat, flags)
- REGINFO *reginfo;
- MPOOL *mp;
- DB_MPOOL_STAT *mstat;
- int flags;
-{
- DB_MPOOL_HASH *hp;
- DB_MUTEX *mutexp;
- int i;
-
- mstat->st_hash_max_wait = 0;
- hp = R_ADDR(reginfo, mp->htab);
- for (i = 0; i < mp->htab_buckets; i++, hp++) {
- mutexp = &hp->hash_mutex;
- mstat->st_hash_nowait += mutexp->mutex_set_nowait;
- mstat->st_hash_wait += mutexp->mutex_set_wait;
- if (mutexp->mutex_set_wait > mstat->st_hash_max_wait)
- mstat->st_hash_max_wait = mutexp->mutex_set_wait;
-
- if (LF_ISSET(DB_STAT_CLEAR)) {
- mutexp->mutex_set_wait = 0;
- mutexp->mutex_set_nowait = 0;
- }
- }
-}
diff --git a/bdb/mp/mp_sync.c b/bdb/mp/mp_sync.c
deleted file mode 100644
index 03b42208b39..00000000000
--- a/bdb/mp/mp_sync.c
+++ /dev/null
@@ -1,627 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_sync.c,v 11.64 2002/08/25 16:00:27 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <stdlib.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-typedef struct {
- DB_MPOOL_HASH *track_hp; /* Hash bucket. */
-
- roff_t track_off; /* Page file offset. */
- db_pgno_t track_pgno; /* Page number. */
-} BH_TRACK;
-
-static int __bhcmp __P((const void *, const void *));
-static int __memp_close_flush_files __P((DB_ENV *, DB_MPOOL *));
-static int __memp_sync_files __P((DB_ENV *, DB_MPOOL *));
-
-/*
- * __memp_sync --
- * Mpool sync function.
- *
- * PUBLIC: int __memp_sync __P((DB_ENV *, DB_LSN *));
- */
-int
-__memp_sync(dbenv, lsnp)
- DB_ENV *dbenv;
- DB_LSN *lsnp;
-{
- DB_MPOOL *dbmp;
- MPOOL *mp;
- int ret;
-
- PANIC_CHECK(dbenv);
- ENV_REQUIRES_CONFIG(dbenv,
- dbenv->mp_handle, "memp_sync", DB_INIT_MPOOL);
-
- /*
- * If no LSN is provided, flush the entire cache (reasonable usage
- * even if there's no log subsystem configured).
- */
- if (lsnp != NULL)
- ENV_REQUIRES_CONFIG(dbenv,
- dbenv->lg_handle, "memp_sync", DB_INIT_LOG);
-
- dbmp = dbenv->mp_handle;
- mp = dbmp->reginfo[0].primary;
-
- /* If we've flushed to the requested LSN, return that information. */
- if (lsnp != NULL) {
- R_LOCK(dbenv, dbmp->reginfo);
- if (log_compare(lsnp, &mp->lsn) <= 0) {
- *lsnp = mp->lsn;
-
- R_UNLOCK(dbenv, dbmp->reginfo);
- return (0);
- }
- R_UNLOCK(dbenv, dbmp->reginfo);
- }
-
- if ((ret = __memp_sync_int(dbenv, NULL, 0, DB_SYNC_CACHE, NULL)) != 0)
- return (ret);
-
- if (lsnp != NULL) {
- R_LOCK(dbenv, dbmp->reginfo);
- if (log_compare(lsnp, &mp->lsn) > 0)
- mp->lsn = *lsnp;
- R_UNLOCK(dbenv, dbmp->reginfo);
- }
-
- return (0);
-}
-
-/*
- * __memp_fsync --
- * Mpool file sync function.
- *
- * PUBLIC: int __memp_fsync __P((DB_MPOOLFILE *));
- */
-int
-__memp_fsync(dbmfp)
- DB_MPOOLFILE *dbmfp;
-{
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
-
- PANIC_CHECK(dbenv);
-
- /*
- * If this handle doesn't have a file descriptor that's open for
- * writing, or if the file is a temporary, there's no reason to
- * proceed further.
- */
- if (F_ISSET(dbmfp, MP_READONLY))
- return (0);
-
- if (F_ISSET(dbmfp->mfp, MP_TEMP))
- return (0);
-
- return (__memp_sync_int(dbenv, dbmfp, 0, DB_SYNC_FILE, NULL));
-}
-
-/*
- * __mp_xxx_fh --
- * Return a file descriptor for DB 1.85 compatibility locking.
- *
- * PUBLIC: int __mp_xxx_fh __P((DB_MPOOLFILE *, DB_FH **));
- */
-int
-__mp_xxx_fh(dbmfp, fhp)
- DB_MPOOLFILE *dbmfp;
- DB_FH **fhp;
-{
- DB_ENV *dbenv;
- /*
- * This is a truly spectacular layering violation, intended ONLY to
- * support compatibility for the DB 1.85 DB->fd call.
- *
- * Sync the database file to disk, creating the file as necessary.
- *
- * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3).
- * The MP_READONLY test isn't interesting because we will either
- * already have a file descriptor (we opened the database file for
- * reading) or we aren't readonly (we created the database which
- * requires write privileges). The MP_TEMP test isn't interesting
- * because we want to write to the backing file regardless so that
- * we get a file descriptor to return.
- */
- *fhp = dbmfp->fhp;
- if (F_ISSET(dbmfp->fhp, DB_FH_VALID))
- return (0);
- dbenv = dbmfp->dbmp->dbenv;
-
- return (__memp_sync_int(dbenv, dbmfp, 0, DB_SYNC_FILE, NULL));
-}
-
-/*
- * __memp_sync_int --
- * Mpool sync internal function.
- *
- * PUBLIC: int __memp_sync_int
- * PUBLIC: __P((DB_ENV *, DB_MPOOLFILE *, int, db_sync_op, int *));
- */
-int
-__memp_sync_int(dbenv, dbmfp, ar_max, op, wrotep)
- DB_ENV *dbenv;
- DB_MPOOLFILE *dbmfp;
- int ar_max, *wrotep;
- db_sync_op op;
-{
- BH *bhp;
- BH_TRACK *bharray;
- DB_MPOOL *dbmp;
- DB_MPOOL_HASH *hp;
- DB_MUTEX *mutexp;
- MPOOL *c_mp, *mp;
- MPOOLFILE *mfp;
- u_int32_t n_cache;
- int ar_cnt, hb_lock, i, pass, remaining, ret, t_ret, wait_cnt, wrote;
-
- dbmp = dbenv->mp_handle;
- mp = dbmp->reginfo[0].primary;
- pass = wrote = 0;
-
- /*
- * If the caller does not specify how many pages assume one
- * per bucket.
- */
- if (ar_max == 0)
- ar_max = mp->nreg * mp->htab_buckets;
-
- if ((ret =
- __os_malloc(dbenv, ar_max * sizeof(BH_TRACK), &bharray)) != 0)
- return (ret);
-
- /*
- * Walk each cache's list of buffers and mark all dirty buffers to be
- * written and all pinned buffers to be potentially written, depending
- * on our flags.
- */
- for (ar_cnt = 0, n_cache = 0; n_cache < mp->nreg; ++n_cache) {
- c_mp = dbmp->reginfo[n_cache].primary;
-
- hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
- for (i = 0; i < c_mp->htab_buckets; i++, hp++) {
- /*
- * We can check for empty buckets before locking as we
- * only care if the pointer is zero or non-zero. We
- * can ignore empty buckets because we only need write
- * buffers that were dirty before we started.
- */
- if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
- continue;
-
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
- for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
- /* Always ignore unreferenced, clean pages. */
- if (bhp->ref == 0 && !F_ISSET(bhp, BH_DIRTY))
- continue;
-
- /*
- * Checkpoints have to wait on all pinned pages,
- * as pages may be marked dirty when returned to
- * the cache.
- *
- * File syncs only wait on pages both pinned and
- * dirty. (We don't care if pages are marked
- * dirty when returned to the cache, that means
- * there's another writing thread and flushing
- * the cache for this handle is meaningless.)
- */
- if (op == DB_SYNC_FILE &&
- !F_ISSET(bhp, BH_DIRTY))
- continue;
-
- mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
-
- /*
- * Ignore temporary files -- this means you
- * can't even flush temporary files by handle.
- * (Checkpoint doesn't require temporary files
- * be flushed and the underlying buffer write
- * write routine may not be able to write it
- * anyway.)
- */
- if (F_ISSET(mfp, MP_TEMP))
- continue;
-
- /*
- * If we're flushing a specific file, see if
- * this page is from that file.
- */
- if (dbmfp != NULL && mfp != dbmfp->mfp)
- continue;
-
- /*
- * Ignore files that aren't involved in DB's
- * transactional operations during checkpoints.
- */
- if (dbmfp == NULL && mfp->lsn_off == -1)
- continue;
-
- /* Track the buffer, we want it. */
- bharray[ar_cnt].track_hp = hp;
- bharray[ar_cnt].track_pgno = bhp->pgno;
- bharray[ar_cnt].track_off = bhp->mf_offset;
- ar_cnt++;
-
- if (ar_cnt >= ar_max) {
- if ((ret = __os_realloc(dbenv,
- (ar_max * 2) * sizeof(BH_TRACK),
- &bharray)) != 0)
- break;
- ar_max *= 2;
- }
- }
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
-
- if (ret != 0)
- goto err;
- }
- }
-
- /* If there no buffers to write, we're done. */
- if (ar_cnt == 0)
- goto done;
-
- /*
- * Write the buffers in file/page order, trying to reduce seeks by the
- * filesystem and, when pages are smaller than filesystem block sizes,
- * reduce the actual number of writes.
- */
- if (ar_cnt > 1)
- qsort(bharray, ar_cnt, sizeof(BH_TRACK), __bhcmp);
-
- /*
- * If we're trickling buffers, only write enough to reach the correct
- * percentage for this region. We may not write enough if the dirty
- * buffers have an unbalanced distribution among the regions, but that
- * seems unlikely.
- */
- if (op == DB_SYNC_TRICKLE && ar_cnt > ar_max / (int)mp->nreg)
- ar_cnt = ar_max / (int)mp->nreg;
-
- /*
- * Flush the log. We have to ensure the log records reflecting the
- * changes on the database pages we're writing have already made it
- * to disk. We still have to check the log each time we write a page
- * (because pages we are about to write may be modified after we have
- * flushed the log), but in general this will at least avoid any I/O
- * on the log's part.
- */
- if (LOGGING_ON(dbenv) && (ret = dbenv->log_flush(dbenv, NULL)) != 0)
- goto err;
-
- /*
- * Walk the array, writing buffers. When we write a buffer, we NULL
- * out its hash bucket pointer so we don't process a slot more than
- * once.
- */
- for (remaining = ar_cnt, i = pass = 0; remaining > 0; ++i) {
- if (i >= ar_cnt) {
- i = 0;
- ++pass;
- __os_sleep(dbenv, 1, 0);
- }
- if ((hp = bharray[i].track_hp) == NULL)
- continue;
-
- /* Lock the hash bucket and find the buffer. */
- mutexp = &hp->hash_mutex;
- MUTEX_LOCK(dbenv, mutexp);
- for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
- if (bhp->pgno == bharray[i].track_pgno &&
- bhp->mf_offset == bharray[i].track_off)
- break;
-
- /*
- * If we can't find the buffer we're done, somebody else had
- * to have written it.
- *
- * If the buffer isn't pinned or dirty, we're done, there's
- * no work needed.
- */
- if (bhp == NULL || (bhp->ref == 0 && !F_ISSET(bhp, BH_DIRTY))) {
- MUTEX_UNLOCK(dbenv, mutexp);
- --remaining;
- bharray[i].track_hp = NULL;
- continue;
- }
-
- /*
- * If the buffer is locked by another thread, ignore it, we'll
- * come back to it.
- *
- * If the buffer is pinned and it's only the first or second
- * time we have looked at it, ignore it, we'll come back to
- * it.
- *
- * In either case, skip the buffer if we're not required to
- * write it.
- */
- if (F_ISSET(bhp, BH_LOCKED) || (bhp->ref != 0 && pass < 2)) {
- MUTEX_UNLOCK(dbenv, mutexp);
- if (op != DB_SYNC_CACHE && op != DB_SYNC_FILE) {
- --remaining;
- bharray[i].track_hp = NULL;
- }
- continue;
- }
-
- /*
- * The buffer is either pinned or dirty.
- *
- * Set the sync wait-for count, used to count down outstanding
- * references to this buffer as they are returned to the cache.
- */
- bhp->ref_sync = bhp->ref;
-
- /* Pin the buffer into memory and lock it. */
- ++bhp->ref;
- F_SET(bhp, BH_LOCKED);
- MUTEX_LOCK(dbenv, &bhp->mutex);
-
- /*
- * Unlock the hash bucket and wait for the wait-for count to
- * go to 0. No new thread can acquire the buffer because we
- * have it locked.
- *
- * If a thread attempts to re-pin a page, the wait-for count
- * will never go to 0 (the thread spins on our buffer lock,
- * while we spin on the thread's ref count). Give up if we
- * don't get the buffer in 3 seconds, we can try again later.
- *
- * If, when the wait-for count goes to 0, the buffer is found
- * to be dirty, write it.
- */
- MUTEX_UNLOCK(dbenv, mutexp);
- for (wait_cnt = 1;
- bhp->ref_sync != 0 && wait_cnt < 4; ++wait_cnt)
- __os_sleep(dbenv, 1, 0);
- MUTEX_LOCK(dbenv, mutexp);
- hb_lock = 1;
-
- /*
- * If the ref_sync count has gone to 0, we're going to be done
- * with this buffer no matter what happens.
- */
- if (bhp->ref_sync == 0) {
- --remaining;
- bharray[i].track_hp = NULL;
- }
-
- /*
- * If the ref_sync count has gone to 0 and the buffer is still
- * dirty, we write it. We only try to write the buffer once.
- * Any process checkpointing or trickle-flushing the pool
- * must be able to write any underlying file -- if the write
- * fails, error out. It would be very strange if file sync
- * failed to write, but we don't care if it happens.
- */
- if (bhp->ref_sync == 0 && F_ISSET(bhp, BH_DIRTY)) {
- hb_lock = 0;
- MUTEX_UNLOCK(dbenv, mutexp);
-
- mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
- if ((ret = __memp_bhwrite(dbmp, hp, mfp, bhp, 1)) == 0)
- ++wrote;
- else if (op == DB_SYNC_CACHE || op == DB_SYNC_TRICKLE)
- __db_err(dbenv, "%s: unable to flush page: %lu",
- __memp_fns(dbmp, mfp), (u_long)bhp->pgno);
- else
- ret = 0;
- }
-
- /*
- * If ref_sync count never went to 0, the buffer was written
- * by another thread, or the write failed, we still have the
- * buffer locked.
- *
- * We may or may not currently hold the hash bucket mutex. If
- * the __memp_bhwrite -> __memp_pgwrite call was successful,
- * then __memp_pgwrite will have swapped the buffer lock for
- * the hash lock. All other call paths will leave us without
- * the hash bucket lock.
- *
- * The order of mutexes above was to acquire the buffer lock
- * while holding the hash bucket lock. Don't deadlock here,
- * release the buffer lock and then acquire the hash bucket
- * lock.
- */
- if (F_ISSET(bhp, BH_LOCKED)) {
- F_CLR(bhp, BH_LOCKED);
- MUTEX_UNLOCK(dbenv, &bhp->mutex);
-
- if (!hb_lock)
- MUTEX_LOCK(dbenv, mutexp);
- }
-
- /*
- * Reset the ref_sync count regardless of our success, we're
- * done with this buffer for now.
- */
- bhp->ref_sync = 0;
-
- /* Discard our reference and unlock the bucket. */
- --bhp->ref;
- MUTEX_UNLOCK(dbenv, mutexp);
-
- if (ret != 0)
- break;
- }
-
-done: /* If we've opened files to flush pages, close them. */
- if ((t_ret = __memp_close_flush_files(dbenv, dbmp)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * If doing a checkpoint or flushing a file for the application, we
- * have to force the pages to disk. We don't do this as we go along
- * because we want to give the OS as much time as possible to lazily
- * flush, and because we have to flush files that might not even have
- * had dirty buffers in the cache, so we have to walk the files list.
- */
- if (ret == 0 && (op == DB_SYNC_CACHE || op == DB_SYNC_FILE)) {
- if (dbmfp == NULL)
- ret = __memp_sync_files(dbenv, dbmp);
- else
- ret = __os_fsync(dbenv, dbmfp->fhp);
- }
-
-err: __os_free(dbenv, bharray);
- if (wrotep != NULL)
- *wrotep = wrote;
-
- return (ret);
-}
-
-/*
- * __memp_sync_files --
- * Sync all the files in the environment, open or not.
- */
-static
-int __memp_sync_files(dbenv, dbmp)
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
-{
- DB_MPOOLFILE *dbmfp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- int ret, t_ret;
-
- ret = 0;
- mp = dbmp->reginfo[0].primary;
-
- R_LOCK(dbenv, dbmp->reginfo);
- for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- if (mfp->stat.st_page_out == 0 ||
- F_ISSET(mfp, MP_DEADFILE | MP_TEMP))
- continue;
-
- /* Look for an already open handle. */
- ret = 0;
- MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
- dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
- if (dbmfp->mfp == mfp) {
- ret = __os_fsync(dbenv, dbmfp->fhp);
- break;
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
- if (ret != 0)
- goto err;
-
- /* If we don't find one, open one. */
- if (dbmfp == NULL) {
- if ((ret = dbenv->memp_fcreate(dbenv, &dbmfp, 0)) != 0)
- goto err;
- ret = __memp_fopen_int(
- dbmfp, mfp, R_ADDR(dbmp->reginfo, mfp->path_off),
- 0, 0, mfp->stat.st_pagesize);
- if (ret == 0)
- ret = __os_fsync(dbenv, dbmfp->fhp);
- if ((t_ret =
- __memp_fclose_int(dbmfp, 0)) != 0 && ret == 0)
- ret = t_ret;
- if (ret != 0)
- goto err;
- }
- }
-
- if (0) {
-err: __db_err(dbenv, "%s: cannot sync: %s",
- R_ADDR(dbmp->reginfo, mfp->path_off), db_strerror(ret));
- }
- R_UNLOCK(dbenv, dbmp->reginfo);
-
- return (ret);
-}
-
-/*
- * __memp_close_flush_files --
- * Close files opened only to flush buffers.
- */
-static int
-__memp_close_flush_files(dbenv, dbmp)
- DB_ENV *dbenv;
- DB_MPOOL *dbmp;
-{
- DB_MPOOLFILE *dbmfp;
- int ret;
-
- /*
- * The routine exists because we must close files opened by sync to
- * flush buffers. There are two cases: first, extent files have to
- * be closed so they may be removed when empty. Second, regular
- * files have to be closed so we don't run out of descriptors (for
- * example, and application partitioning its data into databases
- * based on timestamps, so there's a continually increasing set of
- * files).
- *
- * We mark files opened in the __memp_bhwrite() function with the
- * MP_FLUSH flag. Here we walk through our file descriptor list,
- * and, if a file was opened by __memp_bhwrite(), we close it.
- */
-retry: MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
- for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
- dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
- if (F_ISSET(dbmfp, MP_FLUSH)) {
- F_CLR(dbmfp, MP_FLUSH);
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
- if ((ret = __memp_fclose_int(dbmfp, 0)) != 0)
- return (ret);
- goto retry;
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
-
- return (0);
-}
-
-static int
-__bhcmp(p1, p2)
- const void *p1, *p2;
-{
- BH_TRACK *bhp1, *bhp2;
-
- bhp1 = (BH_TRACK *)p1;
- bhp2 = (BH_TRACK *)p2;
-
- /* Sort by file (shared memory pool offset). */
- if (bhp1->track_off < bhp2->track_off)
- return (-1);
- if (bhp1->track_off > bhp2->track_off)
- return (1);
-
- /*
- * !!!
- * Defend against badly written quicksort code calling the comparison
- * function with two identical pointers (e.g., WATCOM C++ (Power++)).
- */
- if (bhp1->track_pgno < bhp2->track_pgno)
- return (-1);
- if (bhp1->track_pgno > bhp2->track_pgno)
- return (1);
- return (0);
-}
diff --git a/bdb/mp/mp_trickle.c b/bdb/mp/mp_trickle.c
deleted file mode 100644
index 71077ab60cc..00000000000
--- a/bdb/mp/mp_trickle.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
-#include "db_config.h"
-
-#ifndef lint
-static const char revid[] = "$Id: mp_trickle.c,v 11.24 2002/08/06 06:13:53 bostic Exp $";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <stdlib.h>
-#endif
-
-#include "db_int.h"
-#include "dbinc/db_shash.h"
-#include "dbinc/mp.h"
-
-/*
- * __memp_trickle --
- * Keep a specified percentage of the buffers clean.
- *
- * PUBLIC: int __memp_trickle __P((DB_ENV *, int, int *));
- */
-int
-__memp_trickle(dbenv, pct, nwrotep)
- DB_ENV *dbenv;
- int pct, *nwrotep;
-{
- DB_MPOOL *dbmp;
- MPOOL *c_mp, *mp;
- u_int32_t clean, dirty, i, total, dtmp;
- int ret, wrote;
-
- PANIC_CHECK(dbenv);
- ENV_REQUIRES_CONFIG(dbenv,
- dbenv->mp_handle, "memp_trickle", DB_INIT_MPOOL);
-
- dbmp = dbenv->mp_handle;
- mp = dbmp->reginfo[0].primary;
-
- if (nwrotep != NULL)
- *nwrotep = 0;
-
- if (pct < 1 || pct > 100)
- return (EINVAL);
-
- /*
- * If there are sufficient clean buffers, no buffers or no dirty
- * buffers, we're done.
- *
- * XXX
- * Using hash_page_dirty is our only choice at the moment, but it's not
- * as correct as we might like in the presence of pools having more
- * than one page size, as a free 512B buffer isn't the same as a free
- * 8KB buffer.
- *
- * Loop through the caches counting total/dirty buffers.
- */
- for (ret = 0, i = dirty = total = 0; i < mp->nreg; ++i) {
- c_mp = dbmp->reginfo[i].primary;
- total += c_mp->stat.st_pages;
- __memp_stat_hash(&dbmp->reginfo[i], c_mp, &dtmp);
- dirty += dtmp;
- }
-
- clean = total - dirty;
- if (clean == total || (clean * 100) / total >= (u_long)pct)
- return (0);
-
- if (nwrotep == NULL)
- nwrotep = &wrote;
- ret = __memp_sync_int(dbenv, NULL,
- ((total * pct) / 100) - clean, DB_SYNC_TRICKLE, nwrotep);
-
- mp->stat.st_page_trickle += *nwrotep;
-
- return (ret);
-}