diff options
Diffstat (limited to 'bdb/mp/mp_alloc.c')
-rw-r--r-- | bdb/mp/mp_alloc.c | 442 |
1 files changed, 0 insertions, 442 deletions
diff --git a/bdb/mp/mp_alloc.c b/bdb/mp/mp_alloc.c deleted file mode 100644 index 96dd612d7ba..00000000000 --- a/bdb/mp/mp_alloc.c +++ /dev/null @@ -1,442 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: mp_alloc.c,v 11.31 2002/08/14 17:21:37 ubell Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_shash.h" -#include "dbinc/mp.h" - -typedef struct { - DB_MPOOL_HASH *bucket; - u_int32_t priority; -} HS; - -static void __memp_bad_buffer __P((DB_MPOOL_HASH *)); -static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *)); - -/* - * __memp_alloc -- - * Allocate some space from a cache region. - * - * PUBLIC: int __memp_alloc __P((DB_MPOOL *, - * PUBLIC: REGINFO *, MPOOLFILE *, size_t, roff_t *, void *)); - */ -int -__memp_alloc(dbmp, memreg, mfp, len, offsetp, retp) - DB_MPOOL *dbmp; - REGINFO *memreg; - MPOOLFILE *mfp; - size_t len; - roff_t *offsetp; - void *retp; -{ - BH *bhp; - DB_ENV *dbenv; - DB_MPOOL_HASH *dbht, *hp, *hp_end, *hp_tmp; - DB_MUTEX *mutexp; - MPOOL *c_mp; - MPOOLFILE *bh_mfp; - size_t freed_space; - u_int32_t buckets, buffers, high_priority, max_na, priority; - int aggressive, ret; - void *p; - - dbenv = dbmp->dbenv; - c_mp = memreg->primary; - dbht = R_ADDR(memreg, c_mp->htab); - hp_end = &dbht[c_mp->htab_buckets]; - - buckets = buffers = 0; - aggressive = 0; - - c_mp->stat.st_alloc++; - - /* - * Get aggressive if we've tried to flush the number of pages as are - * in the system without finding space. - */ - max_na = 5 * c_mp->htab_buckets; - - /* - * If we're allocating a buffer, and the one we're discarding is the - * same size, we don't want to waste the time to re-integrate it into - * the shared memory free list. If the DB_MPOOLFILE argument isn't - * NULL, we'll compare the underlying page sizes of the two buffers - * before free-ing and re-allocating buffers. - */ - if (mfp != NULL) - len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize; - - R_LOCK(dbenv, memreg); - - /* - * On every buffer allocation we update the buffer generation number - * and check for wraparound. - */ - if (++c_mp->lru_count == UINT32_T_MAX) - __memp_reset_lru(dbenv, memreg, c_mp); - - /* - * Anything newer than 1/10th of the buffer pool is ignored during - * allocation (unless allocation starts failing). - */ - DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10); - high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10; - - /* - * First we try to allocate from free memory. If that fails, scan the - * buffer pool to find buffers with low priorities. We consider small - * sets of hash buckets each time to limit the amount of work needing - * to be done. This approximates LRU, but not very well. We either - * find a buffer of the same size to use, or we will free 3 times what - * we need in the hopes it will coalesce into a contiguous chunk of the - * right size. In the latter case we branch back here and try again. - */ -alloc: if ((ret = __db_shalloc(memreg->addr, len, MUTEX_ALIGN, &p)) == 0) { - if (mfp != NULL) - c_mp->stat.st_pages++; - R_UNLOCK(dbenv, memreg); - -found: if (offsetp != NULL) - *offsetp = R_OFFSET(memreg, p); - *(void **)retp = p; - - /* - * Update the search statistics. - * - * We're not holding the region locked here, these statistics - * can't be trusted. - */ - if (buckets != 0) { - if (buckets > c_mp->stat.st_alloc_max_buckets) - c_mp->stat.st_alloc_max_buckets = buckets; - c_mp->stat.st_alloc_buckets += buckets; - } - if (buffers != 0) { - if (buffers > c_mp->stat.st_alloc_max_pages) - c_mp->stat.st_alloc_max_pages = buffers; - c_mp->stat.st_alloc_pages += buffers; - } - return (0); - } - - /* - * We re-attempt the allocation every time we've freed 3 times what - * we need. Reset our free-space counter. - */ - freed_space = 0; - - /* - * Walk the hash buckets and find the next two with potentially useful - * buffers. Free the buffer with the lowest priority from the buckets' - * chains. - */ - for (hp_tmp = NULL;;) { - /* Check for wrap around. */ - hp = &dbht[c_mp->last_checked++]; - if (hp >= hp_end) { - c_mp->last_checked = 0; - - /* - * If we've gone through all of the hash buckets, try - * an allocation. If the cache is small, the old page - * size is small, and the new page size is large, we - * might have freed enough memory (but not 3 times the - * memory). - */ - goto alloc; - } - - /* - * Skip empty buckets. - * - * We can check for empty buckets before locking as we - * only care if the pointer is zero or non-zero. - */ - if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL) - continue; - - /* - * The failure mode is when there are too many buffers we can't - * write or there's not enough memory in the system. We don't - * have a metric for deciding if allocation has no possible way - * to succeed, so we don't ever fail, we assume memory will be - * available if we wait long enough. - * - * Get aggressive if we've tried to flush 5 times the number of - * hash buckets as are in the system -- it's possible we have - * been repeatedly trying to flush the same buffers, although - * it's unlikely. Aggressive means: - * - * a: set a flag to attempt to flush high priority buffers as - * well as other buffers. - * b: sync the mpool to force out queue extent pages. While we - * might not have enough space for what we want and flushing - * is expensive, why not? - * c: sleep for a second -- hopefully someone else will run and - * free up some memory. Try to allocate memory too, in case - * the other thread returns its memory to the region. - * d: look at a buffer in every hash bucket rather than choose - * the more preferable of two. - * - * !!! - * This test ignores pathological cases like no buffers in the - * system -- that shouldn't be possible. - */ - if ((++buckets % max_na) == 0) { - aggressive = 1; - - R_UNLOCK(dbenv, memreg); - - (void)__memp_sync_int( - dbenv, NULL, 0, DB_SYNC_ALLOC, NULL); - - (void)__os_sleep(dbenv, 1, 0); - - R_LOCK(dbenv, memreg); - goto alloc; - } - - if (!aggressive) { - /* Skip high priority buckets. */ - if (hp->hash_priority > high_priority) - continue; - - /* - * Find two buckets and select the one with the lowest - * priority. Performance testing shows that looking - * at two improves the LRUness and looking at more only - * does a little better. - */ - if (hp_tmp == NULL) { - hp_tmp = hp; - continue; - } - if (hp->hash_priority > hp_tmp->hash_priority) - hp = hp_tmp; - hp_tmp = NULL; - } - - /* Remember the priority of the buffer we're looking for. */ - priority = hp->hash_priority; - - /* Unlock the region and lock the hash bucket. */ - R_UNLOCK(dbenv, memreg); - mutexp = &hp->hash_mutex; - MUTEX_LOCK(dbenv, mutexp); - -#ifdef DIAGNOSTIC - __memp_check_order(hp); -#endif - /* - * The lowest priority page is first in the bucket, as they are - * maintained in sorted order. - * - * The buffer may have been freed or its priority changed while - * we switched from the region lock to the hash lock. If so, - * we have to restart. We will still take the first buffer on - * the bucket's list, though, if it has a low enough priority. - */ - if ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) == NULL || - bhp->ref != 0 || bhp->priority > priority) - goto next_hb; - - buffers++; - - /* Find the associated MPOOLFILE. */ - bh_mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); - - /* If the page is dirty, pin it and write it. */ - ret = 0; - if (F_ISSET(bhp, BH_DIRTY)) { - ++bhp->ref; - ret = __memp_bhwrite(dbmp, hp, bh_mfp, bhp, 0); - --bhp->ref; - if (ret == 0) - ++c_mp->stat.st_rw_evict; - } else - ++c_mp->stat.st_ro_evict; - - /* - * If a write fails for any reason, we can't proceed. - * - * We released the hash bucket lock while doing I/O, so another - * thread may have acquired this buffer and incremented the ref - * count after we wrote it, in which case we can't have it. - * - * If there's a write error, avoid selecting this buffer again - * by making it the bucket's least-desirable buffer. - */ - if (ret != 0 || bhp->ref != 0) { - if (ret != 0 && aggressive) - __memp_bad_buffer(hp); - goto next_hb; - } - - /* - * Check to see if the buffer is the size we're looking for. - * If so, we can simply reuse it. Else, free the buffer and - * its space and keep looking. - */ - if (mfp != NULL && - mfp->stat.st_pagesize == bh_mfp->stat.st_pagesize) { - __memp_bhfree(dbmp, hp, bhp, 0); - - p = bhp; - goto found; - } - - freed_space += __db_shsizeof(bhp); - __memp_bhfree(dbmp, hp, bhp, 1); - - /* - * Unlock this hash bucket and re-acquire the region lock. If - * we're reaching here as a result of calling memp_bhfree, the - * hash bucket lock has already been discarded. - */ - if (0) { -next_hb: MUTEX_UNLOCK(dbenv, mutexp); - } - R_LOCK(dbenv, memreg); - - /* - * Retry the allocation as soon as we've freed up sufficient - * space. We're likely to have to coalesce of memory to - * satisfy the request, don't try until it's likely (possible?) - * we'll succeed. - */ - if (freed_space >= 3 * len) - goto alloc; - } - /* NOTREACHED */ -} - -/* - * __memp_bad_buffer -- - * Make the first buffer in a hash bucket the least desirable buffer. - */ -static void -__memp_bad_buffer(hp) - DB_MPOOL_HASH *hp; -{ - BH *bhp, *t_bhp; - u_int32_t priority; - - /* Remove the first buffer from the bucket. */ - bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh); - SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh); - - /* - * Find the highest priority buffer in the bucket. Buffers are - * sorted by priority, so it's the last one in the bucket. - * - * XXX - * Should use SH_TAILQ_LAST, but I think that macro is broken. - */ - priority = bhp->priority; - for (t_bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh); - t_bhp != NULL; t_bhp = SH_TAILQ_NEXT(t_bhp, hq, __bh)) - priority = t_bhp->priority; - - /* - * Set our buffer's priority to be just as bad, and append it to - * the bucket. - */ - bhp->priority = priority; - SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq); - - /* Reset the hash bucket's priority. */ - hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority; -} - -/* - * __memp_reset_lru -- - * Reset the cache LRU counter. - */ -static void -__memp_reset_lru(dbenv, memreg, c_mp) - DB_ENV *dbenv; - REGINFO *memreg; - MPOOL *c_mp; -{ - BH *bhp; - DB_MPOOL_HASH *hp; - int bucket; - - /* - * Update the counter so all future allocations will start at the - * bottom. - */ - c_mp->lru_count -= MPOOL_BASE_DECREMENT; - - /* Release the region lock. */ - R_UNLOCK(dbenv, memreg); - - /* Adjust the priority of every buffer in the system. */ - for (hp = R_ADDR(memreg, c_mp->htab), - bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) { - /* - * Skip empty buckets. - * - * We can check for empty buckets before locking as we - * only care if the pointer is zero or non-zero. - */ - if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL) - continue; - - MUTEX_LOCK(dbenv, &hp->hash_mutex); - for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) - if (bhp->priority != UINT32_T_MAX && - bhp->priority > MPOOL_BASE_DECREMENT) - bhp->priority -= MPOOL_BASE_DECREMENT; - MUTEX_UNLOCK(dbenv, &hp->hash_mutex); - } - - /* Reacquire the region lock. */ - R_LOCK(dbenv, memreg); -} - -#ifdef DIAGNOSTIC -/* - * __memp_check_order -- - * Verify the priority ordering of a hash bucket chain. - * - * PUBLIC: #ifdef DIAGNOSTIC - * PUBLIC: void __memp_check_order __P((DB_MPOOL_HASH *)); - * PUBLIC: #endif - */ -void -__memp_check_order(hp) - DB_MPOOL_HASH *hp; -{ - BH *bhp; - u_int32_t priority; - - /* - * Assumes the hash bucket is locked. - */ - if ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) == NULL) - return; - - DB_ASSERT(bhp->priority == hp->hash_priority); - - for (priority = bhp->priority; - (bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) != NULL; - priority = bhp->priority) - DB_ASSERT(priority <= bhp->priority); -} -#endif |