diff options
author | unknown <tim@threads.polyesthetic.msg> | 2001-03-04 19:42:05 -0500 |
---|---|---|
committer | unknown <tim@threads.polyesthetic.msg> | 2001-03-04 19:42:05 -0500 |
commit | ec6ae091617bdfdca9e65e8d3e65b950d234f676 (patch) | |
tree | 9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/lock | |
parent | 87d70fb598105b64b538ff6b81eef9da626255b1 (diff) | |
download | mariadb-git-ec6ae091617bdfdca9e65e8d3e65b950d234f676.tar.gz |
Import changeset
Diffstat (limited to 'bdb/lock')
-rw-r--r-- | bdb/lock/Design | 293 | ||||
-rw-r--r-- | bdb/lock/lock.c | 1439 | ||||
-rw-r--r-- | bdb/lock/lock_conflict.c | 34 | ||||
-rw-r--r-- | bdb/lock/lock_deadlock.c | 637 | ||||
-rw-r--r-- | bdb/lock/lock_method.c | 148 | ||||
-rw-r--r-- | bdb/lock/lock_region.c | 430 | ||||
-rw-r--r-- | bdb/lock/lock_stat.c | 308 | ||||
-rw-r--r-- | bdb/lock/lock_util.c | 138 |
8 files changed, 3427 insertions, 0 deletions
diff --git a/bdb/lock/Design b/bdb/lock/Design new file mode 100644 index 00000000000..ac8f0b02fbf --- /dev/null +++ b/bdb/lock/Design @@ -0,0 +1,293 @@ +# $Id: Design,v 11.3 2000/02/19 20:58:03 bostic Exp $ + +Synchronization in the Locking Subsystem + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +1. Data structures +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +The lock manager maintains 3 different structures: + +Objects (__db_lockobj): + Describes an object that is locked. When used with DB, this consists + of a __db_ilock (a file identifier and a page number). + +Lockers (__db_locker): + Identifies a specific locker ID and maintains the head of a list of + locks held by a locker (for using during transaction commit/abort). + +Locks (__db_lock): + Describes a particular object lock held on behalf of a particular + locker id. + +Objects and Lockers reference Locks. + +These structures are organized via two synchronized hash tables. Each +hash table consists of two physical arrays: the array of actual hash +buckets and an array of mutexes so we can lock individual buckets, rather +than the whole table. + +One hash table contains Objects and the other hash table contains Lockers. +Objects contain two lists of locks, waiters and holders: holders currently +hold a lock on the Object, waiters are lock waiting to be granted. +Lockers are a single linked list that connects the Locks held on behalf +of the specific locker ID. + +In the diagram below: + +Locker ID #1 holds a lock on Object #1 (L1) and Object #2 (L5), and is +waiting on a lock on Object #1 (L3). + +Locker ID #2 holds a lock on Object #1 (L2) and is waiting on a lock for +Object #2 (L7). + +Locker ID #3 is waiting for a lock on Object #2 (L6). + + OBJECT ----------------------- + HASH | | + ----|------------- | + ________ _______ | | ________ | | + | |-->| O1 |--|---|-->| O2 | | | + |_______| |_____| | | |______| V | + | | W H--->L1->L2 W H--->L5 | holders + |_______| | | | | V + | | ------->L3 \ ------->L6------>L7 waiters + |_______| / \ \ + . . / \ \ + . . | \ \ + . . | \ ----------- + |_______| | -------------- | + | | ____|____ ___|_____ _|______ + |_______| | | | | | | + | | | LID1 | | LID2 | | LID3 | + |_______| |_______| |_______| |______| + ^ ^ ^ + | | | + ___|________________________|________|___ + LOCKER | | | | | | | | | + HASH | | | | | | | | | + | | | | | | | | | + |____|____|____|____|____|____|____|____| + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +2. Synchronization +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +There are four types of mutexes in the subsystem. + +Object mutexes; + These map one-to-one to each bucket in the Object hash table. + Holding a mutex on an Object bucket secures all the Objects in + that bucket as well as the Lock structures linked from those + Objects. All fields in the Locks EXCEPT the Locker links (the + links that attach Locks by Locker ID) are protected by these + mutexes. + +Locker mutexes: + These map one-to-one to each bucket in the Locker hash table. + Holding a mutex on a Locker bucket secures the Locker structures + and the Locker links in the Locks. + +Memory mutex: + This mutex allows calls to allocate/free memory, i.e. calls to + __db_shalloc and __db_shalloc_free, as well as manipulation of + the Object, Locker and Lock free lists. + +Region mutex: + This mutex is currently only used to protect the locker ids. + It may also be needed later to provide exclusive access to + the region for deadlock detection. + +Creating or removing a Lock requires locking both the Object lock and the +Locker lock (and eventually the shalloc lock to return the item to the +free list). + +The locking hierarchy is as follows: + + The Region mutex may never be acquired after any other mutex. + + The Object mutex may be acquired after the Region mutex. + + The Locker mutex may be acquired after the Region and Object + mutexes. + + The Memory mutex may be acquired after any mutex. + +So, if both and Object mutex and a Locker mutex are going to be acquired, +the Object mutex must be acquired first. + +The Memory mutex may be acquired after any other mutex, but no other mutexes +can be acquired once the Memory mutex is held. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +3. The algorithms: +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +The locking subsystem supports four basic operations: + Get a Lock (lock_get) + + Release a Lock (lock_put) + + Release all the Locks on a specific Object (lock_vec) + + Release all the Locks for a specific Locker (lock_vec) + +Get a lock: + Acquire Object bucket mutex. + Acquire Locker bucket mutex. + + Acquire Memory mutex. + If the Object does not exist + Take an Object off the freelist. + If the Locker doesn't exist + Take a Locker off the freelist. + Take a Lock off the free list. + Release Memory mutex. + + Add Lock to the Object list. + Add Lock to the Locker list. + Release Locker bucket mutex + + If the lock cannot be granted + Release Object bucket mutex + Acquire lock mutex (blocks) + + Acquire Object bucket mutex + If lock acquisition did not succeed (e.g, deadlock) + Acquire Locker bucket mutex + If locker should be destroyed + Remove locker from hash table + Acquire Memory mutex + Return locker to free list + Release Memory mutex + Release Locker bucket mutex + + If object should be released + Acquire Memory mutex + Return object to free list + Release Memory mutex + + Release Object bucket mutex + +Release a lock: + Acquire Object bucket mutex. + (Requires that we be able to find the Object hash bucket + without looking inside the Lock itself.) + + If releasing a single lock and the user provided generation number + doesn't match the Lock's generation number, the Lock has been reused + and we return failure. + + Enter lock_put_internal: + if the Lock is still on the Object's lists: + Increment Lock's generation number. + Remove Lock from the Object's list (NULL link fields). + Promote locks for the Object. + + Enter locker_list_removal + Acquire Locker bucket mutex. + If Locker doesn't exist: + Release Locker bucket mutex + Release Object bucket mutex + Return error. + Else if Locker marked as deleted: + dont_release = TRUE + Else + Remove Lock from Locker list. + If Locker has no more locks + Remove Locker from table. + Acquire Memory mutex. + Return Locker to free list + Release Memory mutex + Release Locker bucket mutex. + Exit locker_list_removal + + If (!dont_release) + Acquire Memory mutex + Return Lock to free list + Release Memory mutex + + Exit lock_put_internal + + Release Object bucket mutex + +Release all the Locks on a specific Object (lock_vec, DB_PUT_ALL_OBJ): + + Acquire Object bucket mutex. + + For each lock on the waiter list: + lock_put_internal + For each lock on the holder list: + lock_put_internal + + Release Object bucket mutex. + +Release all the Locks for a specific Locker (lock_vec, DB_PUT_ALL): + + Acquire Locker bucket mutex. + Mark Locker deleted. + Release Locker mutex. + + For each lock on the Locker's list: + Remove from locker's list + (The lock could get put back on the free list in + lock_put and then could get reallocated and the + act of setting its locker links could clobber us.) + Perform "Release a Lock" above: skip locker_list_removal. + + Acquire Locker bucket mutex. + Remove Locker + Release Locker mutex. + + Acquire Memory mutex + Return Locker to free list + Release Memory mutex + +Deadlock detection (lock_detect): + + For each bucket in Object table + Acquire the Object bucket mutex. + create waitsfor + + For each bucket in Object table + Release the Object mutex. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +FAQ: +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +Q: Why do you need generation numbers? +A: If a lock has been released due to a transaction abort (potentially in a + different process), and then lock is released by a thread of control + unaware of the abort, the lock might have potentially been re-allocated + to a different object. The generation numbers detect this problem. + + Note, we assume that reads/writes of lock generation numbers are atomic, + if they are not, it is theoretically possible that a re-allocated lock + could be mistaken for another lock. + +Q: Why is is safe to walk the Locker list without holding any mutexes at + all? +A: Locks are created with both the Object and Locker bucket mutexes held. + Once created, they removed in two ways: + + a) when a specific Lock is released, in which case, the Object and + Locker bucket mutexes are again held, and + + b) when all Locks for a specific Locker Id is released. + + In case b), the Locker bucket mutex is held while the Locker chain is + marked as "destroyed", which blocks any further access to the Locker + chain. Then, each individual Object bucket mutex is acquired when each + individual Lock is removed. + +Q: What are the implications of doing fine grain locking? + +A: Since we no longer globally lock the entire region, lock_vec will no + longer be atomic. We still execute the items in a lock_vec in order, + so things like lock-coupling still work, but you can't make any + guarantees about atomicity. + +Q: How do I configure for FINE_GRAIN locking? + +A: We currently do not support any automatic configuration for FINE_GRAIN + locking. When we do, will need to document that atomicity discussion + listed above (it is bug-report #553). diff --git a/bdb/lock/lock.c b/bdb/lock/lock.c new file mode 100644 index 00000000000..8d246f7ded3 --- /dev/null +++ b/bdb/lock/lock.c @@ -0,0 +1,1439 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: lock.c,v 11.40 2000/12/19 23:18:58 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_shash.h" +#include "lock.h" +#include "log.h" +#include "db_am.h" +#include "txn.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +static int __lock_checklocker __P((DB_LOCKTAB *, + struct __db_lock *, u_int32_t, u_int32_t, int *)); +static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, + u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); +static int __lock_is_parent __P((DB_LOCKTAB *, u_int32_t, DB_LOCKER *)); +static int __lock_put_internal __P((DB_LOCKTAB *, + struct __db_lock *, u_int32_t, u_int32_t)); +static int __lock_put_nolock __P((DB_ENV *, DB_LOCK *, int *, int)); +static void __lock_remove_waiter __P((DB_ENV *, + DB_LOCKOBJ *, struct __db_lock *, db_status_t)); + +static const char __db_lock_err[] = "Lock table is out of available %s"; +static const char __db_lock_invalid[] = "%s: Lock is no longer valid"; +static const char __db_locker_invalid[] = "Locker is not valid"; + +/* + * lock_id -- + * Generate a unique locker id. + */ +int +lock_id(dbenv, idp) + DB_ENV *dbenv; + u_int32_t *idp; +{ + DB_LOCKTAB *lt; + DB_LOCKREGION *region; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_lock_id(dbenv, idp)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + /* + * Note that we are letting locker IDs wrap. + * + * This is potentially dangerous in that it's conceivable that you + * could be allocating a new locker id and still have someone using + * it. However, the alternatives are that we keep a bitmap of + * locker ids or we forbid wrapping. Both are probably bad. The + * bitmap of locker ids will take up 64 MB of space. Forbidding + * wrapping means that we'll run out of locker IDs after 2 billion. + * In order for the wrap bug to fire, we'd need to have something + * that stayed open while 2 billion locker ids were used up. Since + * we cache cursors it means that something would have to stay open + * sufficiently long that we open and close a lot of files and a + * lot of cursors within them. Betting that this won't happen seems + * to the lesser of the evils. + */ + LOCKREGION(dbenv, lt); + if (region->id >= DB_LOCK_MAXID) + region->id = 0; + *idp = ++region->id; + UNLOCKREGION(dbenv, lt); + + return (0); +} + +/* + * Vector lock routine. This function takes a set of operations + * and performs them all at once. In addition, lock_vec provides + * functionality for lock inheritance, releasing all locks for a + * given locker (used during transaction commit/abort), releasing + * all locks on a given object, and generating debugging information. + */ +int +lock_vec(dbenv, locker, flags, list, nlist, elistp) + DB_ENV *dbenv; + u_int32_t locker, flags; + int nlist; + DB_LOCKREQ *list, **elistp; +{ + struct __db_lock *lp, *next_lock; + DB_LOCKER *sh_locker, *sh_parent; + DB_LOCKOBJ *obj, *sh_obj; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + u_int32_t lndx, ndx; + int did_abort, i, ret, run_dd; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_lock_vec(dbenv, locker, + flags, list, nlist, elistp)); +#endif + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + + /* Validate arguments. */ + if ((ret = __db_fchk(dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0) + return (ret); + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + run_dd = 0; + LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); + for (i = 0, ret = 0; i < nlist && ret == 0; i++) + switch (list[i].op) { + case DB_LOCK_GET: + ret = __lock_get_internal(dbenv->lk_handle, + locker, flags, + list[i].obj, list[i].mode, &list[i].lock); + break; + case DB_LOCK_INHERIT: + + /* + * Get the committing locker and mark it as deleted. + * This allows us to traverse the locker links without + * worrying that someone else is deleting locks out + * from under us. However, if the locker doesn't + * exist, that just means that the child holds no + * locks, so inheritance is easy! + */ + LOCKER_LOCK(lt, region, locker, ndx); + if ((ret = __lock_getlocker(lt, + locker, ndx, 0, &sh_locker)) != 0 || + sh_locker == NULL || + F_ISSET(sh_locker, DB_LOCKER_DELETED)) { + if (ret == 0 && sh_locker != NULL) + ret = EACCES; + __db_err(dbenv, __db_locker_invalid); + break; + } + + /* Make sure we are a child transaction. */ + if (sh_locker->parent_locker == INVALID_ROFF) { + __db_err(dbenv, "Not a child transaction"); + ret = EINVAL; + break; + } + sh_parent = (DB_LOCKER *) + R_ADDR(<->reginfo, sh_locker->parent_locker); + F_SET(sh_locker, DB_LOCKER_DELETED); + + /* + * Now, lock the parent locker; move locks from + * the committing list to the parent's list. + */ + LOCKER_LOCK(lt, region, locker, ndx); + if (F_ISSET(sh_parent, DB_LOCKER_DELETED)) { + if (ret == 0) { + __db_err(dbenv, + "Parent locker is not valid"); + ret = EACCES; + } + break; + } + + for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); + lp != NULL; + lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) { + SH_LIST_REMOVE(lp, locker_links, __db_lock); + SH_LIST_INSERT_HEAD(&sh_parent->heldby, lp, + locker_links, __db_lock); + lp->holder = sh_parent->id; + + /* Get the object associated with this lock. */ + obj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); + + (void)__lock_promote(lt, obj, + LF_ISSET(DB_LOCK_NOWAITERS)); + } + + /* Now free the original locker. */ + ret = __lock_checklocker(lt, + NULL, locker, DB_LOCK_IGNOREDEL, NULL); + break; + case DB_LOCK_PUT: + ret = + __lock_put_nolock(dbenv, &list[i].lock, &run_dd, 0); + break; + case DB_LOCK_PUT_ALL: + /* + * Get the locker and mark it as deleted. This + * allows us to traverse the locker links without + * worrying that someone else is deleting locks out + * from under us. Since the locker may hold no + * locks (i.e., you could call abort before you've + * done any work), it's perfectly reasonable for there + * to be no locker; this is not an error. + */ + LOCKER_LOCK(lt, region, locker, ndx); + if ((ret = __lock_getlocker(lt, + locker, ndx, 0, &sh_locker)) != 0 || + sh_locker == NULL || + F_ISSET(sh_locker, DB_LOCKER_DELETED)) + /* + * If ret is set, then we'll generate an + * error. If it's not set, we have nothing + * to do. + */ + break; + F_SET(sh_locker, DB_LOCKER_DELETED); + + /* Now traverse the locks, releasing each one. */ + for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); + lp != NULL; + lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) { + SH_LIST_REMOVE(lp, locker_links, __db_lock); + sh_obj = + (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); + SHOBJECT_LOCK(lt, region, sh_obj, lndx); + ret = __lock_put_internal(lt, + lp, lndx, DB_LOCK_FREE | DB_LOCK_DOALL); + if (ret != 0) + break; + } + ret = __lock_checklocker(lt, + NULL, locker, DB_LOCK_IGNOREDEL, NULL); + break; + case DB_LOCK_PUT_OBJ: + /* Remove all the locks associated with an object. */ + OBJECT_LOCK(lt, region, list[i].obj, ndx); + if ((ret = __lock_getobj(lt, list[i].obj, + ndx, 0, &sh_obj)) != 0 || sh_obj == NULL) { + if (ret == 0) + ret = EINVAL; + break; + } + + /* + * Go through both waiters and holders. Don't bother + * to run promotion, because everyone is getting + * released. The processes waiting will still get + * awakened as their waiters are released. + */ + for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock); + ret == 0 && lp != NULL; + lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) + ret = __lock_put_internal(lt, + lp, ndx, DB_LOCK_NOPROMOTE | DB_LOCK_DOALL); + + /* + * On the last time around, the object will get + * reclaimed by __lock_put_internal, structure the + * loop carefully so we do not get bitten. + */ + for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); + ret == 0 && lp != NULL; + lp = next_lock) { + next_lock = SH_TAILQ_NEXT(lp, links, __db_lock); + ret = __lock_put_internal(lt, + lp, ndx, DB_LOCK_NOPROMOTE | DB_LOCK_DOALL); + } + break; +#ifdef DEBUG + case DB_LOCK_DUMP: + /* Find the locker. */ + LOCKER_LOCK(lt, region, locker, ndx); + if ((ret = __lock_getlocker(lt, + locker, ndx, 0, &sh_locker)) != 0 + || sh_locker == NULL + || F_ISSET(sh_locker, DB_LOCKER_DELETED)) + break; + + for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); + lp != NULL; + lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) { + __lock_printlock(lt, lp, 1); + } + break; +#endif + default: + __db_err(dbenv, + "Invalid lock operation: %d", list[i].op); + ret = EINVAL; + break; + } + + if (ret == 0 && region->need_dd && region->detect != DB_LOCK_NORUN) { + run_dd = 1; + region->need_dd = 0; + } + UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); + + if (run_dd) + (void)lock_detect(dbenv, 0, region->detect, &did_abort); + + if (ret != 0 && elistp != NULL) + *elistp = &list[i - 1]; + + return (ret); +} + +/* + * Lock acquisition routines. There are two library interfaces: + * + * lock_get -- + * original lock get interface that takes a locker id. + * + * All the work for lock_get (and for the GET option of lock_vec) is done + * inside of lock_get_internal. + */ +int +lock_get(dbenv, locker, flags, obj, lock_mode, lock) + DB_ENV *dbenv; + u_int32_t locker, flags; + const DBT *obj; + db_lockmode_t lock_mode; + DB_LOCK *lock; +{ + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_lock_get(dbenv, locker, + flags, obj, lock_mode, lock)); +#endif + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + + if (IS_RECOVERING(dbenv)) { + lock->off = LOCK_INVALID; + return (0); + } + + /* Validate arguments. */ + if ((ret = __db_fchk(dbenv, + "lock_get", flags, + DB_LOCK_NOWAIT | DB_LOCK_UPGRADE | DB_LOCK_SWITCH)) != 0) + return (ret); + + LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); + ret = __lock_get_internal(dbenv->lk_handle, + locker, flags, obj, lock_mode, lock); + UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); + return (ret); +} + +static int +__lock_get_internal(lt, locker, flags, obj, lock_mode, lock) + DB_LOCKTAB *lt; + u_int32_t locker, flags; + const DBT *obj; + db_lockmode_t lock_mode; + DB_LOCK *lock; +{ + struct __db_lock *newl, *lp; + DB_ENV *dbenv; + DB_LOCKER *sh_locker; + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + u_int32_t locker_ndx; + int did_abort, freed, ihold, on_locker_list, no_dd, ret; + + no_dd = ret = 0; + on_locker_list = 0; + region = lt->reginfo.primary; + dbenv = lt->dbenv; + + /* + * If we are not going to reuse this lock, initialize + * the offset to invalid so that if we fail it + * will not look like a valid lock. + */ + if (!LF_ISSET(DB_LOCK_UPGRADE | DB_LOCK_SWITCH)) + lock->off = LOCK_INVALID; + + /* + * Check that the lock mode is valid. + */ + if ((u_int32_t)lock_mode >= region->nmodes) { + __db_err(dbenv, + "lock_get: invalid lock mode %lu\n", (u_long)lock_mode); + return (EINVAL); + } + + /* Allocate a new lock. Optimize for the common case of a grant. */ + region->nrequests++; + if ((newl = SH_TAILQ_FIRST(®ion->free_locks, __db_lock)) != NULL) + SH_TAILQ_REMOVE(®ion->free_locks, newl, links, __db_lock); + if (newl == NULL) { + __db_err(dbenv, __db_lock_err, "locks"); + return (ENOMEM); + } + if (++region->nlocks > region->maxnlocks) + region->maxnlocks = region->nlocks; + + /* Allocate a new object. */ + OBJECT_LOCK(lt, region, obj, lock->ndx); + if ((ret = __lock_getobj(lt, obj, lock->ndx, 1, &sh_obj)) != 0) + goto err; + + /* Get the locker, we may need it to find our parent. */ + LOCKER_LOCK(lt, region, locker, locker_ndx); + if ((ret = + __lock_getlocker(lt, locker, locker_ndx, 1, &sh_locker)) != 0) { + /* + * XXX: Margo + * CLEANUP the object and the lock. + */ + return (ret); + } + + /* + * Now we have a lock and an object and we need to see if we should + * grant the lock. We use a FIFO ordering so we can only grant a + * new lock if it does not conflict with anyone on the holders list + * OR anyone on the waiters list. The reason that we don't grant if + * there's a conflict is that this can lead to starvation (a writer + * waiting on a popularly read item will never be granted). The + * downside of this is that a waiting reader can prevent an upgrade + * from reader to writer, which is not uncommon. + * + * There is one exception to the no-conflict rule. If a lock is held + * by the requesting locker AND the new lock does not conflict with + * any other holders, then we grant the lock. The most common place + * this happens is when the holder has a WRITE lock and a READ lock + * request comes in for the same locker. If we do not grant the read + * lock, then we guarantee deadlock. + * + * In case of conflict, we put the new lock on the end of the waiters + * list, unless we are upgrading in which case the locker goes on the + * front of the list. + */ + ihold = 0; + lp = NULL; + if (LF_ISSET(DB_LOCK_SWITCH)) + goto put_lock; + + for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); + lp != NULL; + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { + if (locker == lp->holder || + __lock_is_parent(lt, lp->holder, sh_locker)) { + if (lp->mode == lock_mode && + lp->status == DB_LSTAT_HELD) { + if (LF_ISSET(DB_LOCK_UPGRADE)) + goto upgrade; + + /* + * Lock is held, so we can increment the + * reference count and return this lock. + */ + lp->refcount++; + lock->off = R_OFFSET(<->reginfo, lp); + lock->gen = lp->gen; + + ret = 0; + goto done; + } else + ihold = 1; + } else if (CONFLICTS(lt, region, lp->mode, lock_mode)) + break; + } + + /* + * Make the new lock point to the new object, initialize fields. + * + * This lock is not linked in anywhere, so we can muck with it + * without holding any mutexes. + */ +put_lock: + newl->holder = locker; + newl->refcount = 1; + newl->mode = lock_mode; + newl->obj = SH_PTR_TO_OFF(newl, sh_obj); + newl->status = DB_LSTAT_HELD; + + /* + * If we are upgrading, then there are two scenarios. Either + * we had no conflicts, so we can do the upgrade. Or, there + * is a conflict and we should wait at the HEAD of the waiters + * list. + */ + if (LF_ISSET(DB_LOCK_UPGRADE)) { + if (lp == NULL) + goto upgrade; + + /* + * There was a conflict, wait. If this is the first waiter, + * add the object to the deadlock detector's list. + */ + if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) + SH_TAILQ_INSERT_HEAD(®ion->dd_objs, + sh_obj, dd_links, __db_lockobj); + + SH_TAILQ_INSERT_HEAD(&sh_obj->waiters, newl, links, __db_lock); + goto llist; + } + + if (lp == NULL && !ihold) + for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock); + lp != NULL; + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { + if (CONFLICTS(lt, region, lp->mode, lock_mode) && + locker != lp->holder) + break; + } + if (!LF_ISSET(DB_LOCK_SWITCH) && lp == NULL) + SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links); + else if (!LF_ISSET(DB_LOCK_NOWAIT)) { + /* + * If this is the first waiter, add the object to the + * deadlock detector's list. + */ + if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) + SH_TAILQ_INSERT_HEAD(®ion->dd_objs, + sh_obj, dd_links, __db_lockobj); + SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links); + } else { + ret = DB_LOCK_NOTGRANTED; + if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL + && LOCKER_FREEABLE(sh_locker)) + __lock_freelocker( lt, region, sh_locker, locker_ndx); + region->nnowaits++; + goto err; + } + +llist: + /* + * Now, insert the lock onto its locker's list. If the locker does + * not currently hold any locks, there's no reason to run a deadlock + * detector, save that information. + */ + on_locker_list = 1; + no_dd = sh_locker->master_locker == INVALID_ROFF + && SH_LIST_FIRST(&sh_locker->child_locker, __db_locker) == NULL + && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL; + + SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock); + + if (LF_ISSET(DB_LOCK_SWITCH) || lp != NULL) { + if (LF_ISSET(DB_LOCK_SWITCH) && + (ret = __lock_put_nolock(dbenv, + lock, &ihold, DB_LOCK_NOWAITERS)) != 0) + goto err; + /* + * This is really a blocker for the thread. It should be + * initialized locked, so that when we try to acquire it, we + * block. + */ + newl->status = DB_LSTAT_WAITING; + region->nconflicts++; + if (region->detect == DB_LOCK_NORUN) + region->need_dd = 1; + UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); + + /* + * We are about to wait; before waiting, see if the deadlock + * detector should be run. + */ + if (region->detect != DB_LOCK_NORUN && !no_dd) + (void)lock_detect(dbenv, 0, region->detect, &did_abort); + + MUTEX_LOCK(dbenv, &newl->mutex, dbenv->lockfhp); + LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); + + if (newl->status != DB_LSTAT_PENDING) { + (void)__lock_checklocker(lt, + newl, newl->holder, 0, &freed); + switch (newl->status) { + case DB_LSTAT_ABORTED: + on_locker_list = 0; + ret = DB_LOCK_DEADLOCK; + break; + case DB_LSTAT_NOGRANT: + ret = DB_LOCK_NOTGRANTED; + break; + default: + ret = EINVAL; + break; + } + goto err; + } else if (LF_ISSET(DB_LOCK_UPGRADE)) { + /* + * The lock that was just granted got put on the + * holders list. Since we're upgrading some other + * lock, we've got to remove it here. + */ + SH_TAILQ_REMOVE( + &sh_obj->holders, newl, links, __db_lock); + /* + * Ensure that the object is not believed to be on + * the object's lists, if we're traversing by locker. + */ + newl->links.stqe_prev = -1; + goto upgrade; + } else + newl->status = DB_LSTAT_HELD; + } + + lock->off = R_OFFSET(<->reginfo, newl); + lock->gen = newl->gen; + + return (0); + +upgrade:/* + * This was an upgrade, so return the new lock to the free list and + * upgrade the mode of the original lock. + */ + ((struct __db_lock *)R_ADDR(<->reginfo, lock->off))->mode = lock_mode; + + ret = 0; + /* FALLTHROUGH */ + +done: +err: newl->status = DB_LSTAT_FREE; + if (on_locker_list) { + SH_LIST_REMOVE(newl, locker_links, __db_lock); + } + SH_TAILQ_INSERT_HEAD(®ion->free_locks, newl, links, __db_lock); + region->nlocks--; + return (ret); +} + +/* + * Lock release routines. + * + * The user callable one is lock_put and the three we use internally are + * __lock_put_nolock, __lock_put_internal and __lock_downgrade. + */ +int +lock_put(dbenv, lock) + DB_ENV *dbenv; + DB_LOCK *lock; +{ + DB_LOCKTAB *lt; + int ret, run_dd; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_lock_put(dbenv, lock)); +#endif + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + + if (IS_RECOVERING(dbenv)) + return (0); + + lt = dbenv->lk_handle; + + LOCKREGION(dbenv, lt); + ret = __lock_put_nolock(dbenv, lock, &run_dd, 0); + UNLOCKREGION(dbenv, lt); + + if (ret == 0 && run_dd) + (void)lock_detect(dbenv, 0, + ((DB_LOCKREGION *)lt->reginfo.primary)->detect, NULL); + return (ret); +} + +static int +__lock_put_nolock(dbenv, lock, runp, flags) + DB_ENV *dbenv; + DB_LOCK *lock; + int *runp; + int flags; +{ + struct __db_lock *lockp; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + u_int32_t locker; + int ret; + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + lockp = (struct __db_lock *)R_ADDR(<->reginfo, lock->off); + lock->off = LOCK_INVALID; + if (lock->gen != lockp->gen) { + __db_err(dbenv, __db_lock_invalid, "lock_put"); + return (EACCES); + } + + locker = lockp->holder; + ret = __lock_put_internal(lt, + lockp, lock->ndx, flags | DB_LOCK_UNLINK | DB_LOCK_FREE); + + *runp = 0; + if (ret == 0 && region->need_dd && region->detect != DB_LOCK_NORUN) { + *runp = 1; + region->need_dd = 0; + } + + return (ret); +} + +/* + * __lock_downgrade -- + * Used by the concurrent access product to downgrade write locks + * back to iwrite locks. + * + * PUBLIC: int __lock_downgrade __P((DB_ENV *, + * PUBLIC: DB_LOCK *, db_lockmode_t, u_int32_t)); + */ +int +__lock_downgrade(dbenv, lock, new_mode, flags) + DB_ENV *dbenv; + DB_LOCK *lock; + db_lockmode_t new_mode; + u_int32_t flags; +{ + struct __db_lock *lockp; + DB_LOCKOBJ *obj; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + int ret; + + COMPQUIET(flags, 0); + + PANIC_CHECK(dbenv); + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + LOCKREGION(dbenv, lt); + + lockp = (struct __db_lock *)R_ADDR(<->reginfo, lock->off); + if (lock->gen != lockp->gen) { + __db_err(dbenv, __db_lock_invalid, "lock_downgrade"); + ret = EACCES; + goto out; + } + + lockp->mode = new_mode; + + /* Get the object associated with this lock. */ + obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); + (void)__lock_promote(lt, obj, LF_ISSET(DB_LOCK_NOWAITERS)); + + ++region->nreleases; +out: UNLOCKREGION(dbenv, lt); + + return (0); +} + +static int +__lock_put_internal(lt, lockp, obj_ndx, flags) + DB_LOCKTAB *lt; + struct __db_lock *lockp; + u_int32_t obj_ndx; + u_int32_t flags; +{ + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + int no_reclaim, ret, state_changed; + + region = lt->reginfo.primary; + no_reclaim = ret = state_changed = 0; + + if (!OBJ_LINKS_VALID(lockp)) { + /* + * Someone removed this lock while we were doing a release + * by locker id. We are trying to free this lock, but it's + * already been done; all we need to do is return it to the + * free list. + */ + lockp->status = DB_LSTAT_FREE; + SH_TAILQ_INSERT_HEAD( + ®ion->free_locks, lockp, links, __db_lock); + region->nlocks--; + return (0); + } + + if (LF_ISSET(DB_LOCK_DOALL)) + region->nreleases += lockp->refcount; + else + region->nreleases++; + + if (!LF_ISSET(DB_LOCK_DOALL) && lockp->refcount > 1) { + lockp->refcount--; + return (0); + } + + /* Increment generation number. */ + lockp->gen++; + + /* Get the object associated with this lock. */ + sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); + + /* Remove this lock from its holders/waitlist. */ + if (lockp->status != DB_LSTAT_HELD) + __lock_remove_waiter(lt->dbenv, sh_obj, lockp, DB_LSTAT_FREE); + else { + SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock); + lockp->links.stqe_prev = -1; + } + + if (LF_ISSET(DB_LOCK_NOPROMOTE)) + state_changed = 0; + else + state_changed = + __lock_promote(lt, sh_obj, LF_ISSET(DB_LOCK_NOWAITERS)); + + if (LF_ISSET(DB_LOCK_UNLINK)) + ret = __lock_checklocker(lt, lockp, lockp->holder, flags, NULL); + + /* Check if object should be reclaimed. */ + if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL + && SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) { + HASHREMOVE_EL(lt->obj_tab, + obj_ndx, __db_lockobj, links, sh_obj); + if (sh_obj->lockobj.size > sizeof(sh_obj->objdata)) + __db_shalloc_free(lt->reginfo.addr, + SH_DBT_PTR(&sh_obj->lockobj)); + SH_TAILQ_INSERT_HEAD( + ®ion->free_objs, sh_obj, links, __db_lockobj); + region->nobjects--; + state_changed = 1; + } + + /* Free lock. */ + if (!LF_ISSET(DB_LOCK_UNLINK) && LF_ISSET(DB_LOCK_FREE)) { + lockp->status = DB_LSTAT_FREE; + SH_TAILQ_INSERT_HEAD( + ®ion->free_locks, lockp, links, __db_lock); + region->nlocks--; + } + + /* + * If we did not promote anyone; we need to run the deadlock + * detector again. + */ + if (state_changed == 0) + region->need_dd = 1; + + return (ret); +} + +/* + * Utility functions; listed alphabetically. + */ + +/* + * __lock_checklocker -- + * If a locker has no more locks, then we can free the object. + * Return a boolean indicating whether we freed the object or not. + * + * Must be called without the locker's lock set. + */ +static int +__lock_checklocker(lt, lockp, locker, flags, freed) + DB_LOCKTAB *lt; + struct __db_lock *lockp; + u_int32_t locker, flags; + int *freed; +{ + DB_ENV *dbenv; + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + u_int32_t indx; + int ret; + + dbenv = lt->dbenv; + region = lt->reginfo.primary; + ret = 0; + + if (freed != NULL) + *freed = 0; + + LOCKER_LOCK(lt, region, locker, indx); + + /* If the locker's list is NULL, free up the locker. */ + if ((ret = __lock_getlocker(lt, + locker, indx, 0, &sh_locker)) != 0 || sh_locker == NULL) { + if (ret == 0) + ret = EACCES; + __db_err(lt->dbenv, __db_locker_invalid); + goto freelock; + } + + if (F_ISSET(sh_locker, DB_LOCKER_DELETED)) { + LF_CLR(DB_LOCK_FREE); + if (!LF_ISSET(DB_LOCK_IGNOREDEL)) + goto freelock; + } + + if (LF_ISSET(DB_LOCK_UNLINK)) + SH_LIST_REMOVE(lockp, locker_links, __db_lock); + + if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL + && LOCKER_FREEABLE(sh_locker)) { + __lock_freelocker( lt, region, sh_locker, indx); + if (freed != NULL) + *freed = 1; + } + +freelock: + if (LF_ISSET(DB_LOCK_FREE)) { + lockp->status = DB_LSTAT_FREE; + SH_TAILQ_INSERT_HEAD( + ®ion->free_locks, lockp, links, __db_lock); + region->nlocks--; + } + + return (ret); +} + +/* + * __lock_addfamilylocker + * Put a locker entry in for a child transaction. + * + * PUBLIC: int __lock_addfamilylocker __P((DB_ENV *, u_int32_t, u_int32_t)); + */ +int +__lock_addfamilylocker(dbenv, pid, id) + DB_ENV *dbenv; + u_int32_t pid, id; +{ + DB_LOCKER *lockerp, *mlockerp; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + u_int32_t ndx; + int ret; + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + LOCKREGION(dbenv, lt); + + /* get/create the parent locker info */ + LOCKER_LOCK(lt, region, pid, ndx); + if ((ret = __lock_getlocker(dbenv->lk_handle, + pid, ndx, 1, &mlockerp)) != 0) + goto err; + + /* + * We assume that only one thread can manipulate + * a single transaction family. + * Therefore the master locker cannot go away while + * we manipulate it, nor can another child in the + * family be created at the same time. + */ + LOCKER_LOCK(lt, region, id, ndx); + if ((ret = __lock_getlocker(dbenv->lk_handle, + id, ndx, 1, &lockerp)) != 0) + goto err; + + /* Point to our parent. */ + lockerp->parent_locker = R_OFFSET(<->reginfo, mlockerp); + + /* See if this locker is the family master. */ + if (mlockerp->master_locker == INVALID_ROFF) + lockerp->master_locker = R_OFFSET(<->reginfo, mlockerp); + else { + lockerp->master_locker = mlockerp->master_locker; + mlockerp = R_ADDR(<->reginfo, mlockerp->master_locker); + } + + /* + * Link the child at the head of the master's list. + * The guess is when looking for deadlock that + * the most recent child is the one thats blocked. + */ + SH_LIST_INSERT_HEAD( + &mlockerp->child_locker, lockerp, child_link, __db_locker); + +err: + UNLOCKREGION(dbenv, lt); + + return (ret); +} + +/* + * __lock_freefamilylocker + * Remove a locker from the hash table and its family. + * + * This must be called without the locker bucket locked. + * + * PUBLIC: int __lock_freefamilylocker __P((DB_LOCKTAB *, u_int32_t)); + */ +int +__lock_freefamilylocker(lt, locker) + DB_LOCKTAB *lt; + u_int32_t locker; +{ + DB_ENV *dbenv; + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + u_int32_t indx; + int ret; + + dbenv = lt->dbenv; + region = lt->reginfo.primary; + + LOCKREGION(dbenv, lt); + LOCKER_LOCK(lt, region, locker, indx); + + if ((ret = __lock_getlocker(lt, + locker, indx, 0, &sh_locker)) != 0 || sh_locker == NULL) { + if (ret == 0) + ret = EACCES; + goto freelock; + } + if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) != NULL) { + ret = EINVAL; + __db_err(dbenv, "Freeing locker with locks"); + goto freelock; + } + + /* If this is part of a family, we must fix up its links. */ + if (sh_locker->master_locker != INVALID_ROFF) + SH_LIST_REMOVE(sh_locker, child_link, __db_locker); + + __lock_freelocker(lt, region, sh_locker, indx); + +freelock: + UNLOCKREGION(dbenv, lt); + return (ret); +} + +/* + * __lock_freelocker + * common code for deleting a locker. + * + * This must be called with the locker bucket locked. + * + * PUBLIC: void __lock_freelocker __P((DB_LOCKTAB *, + * PUBLIC: DB_LOCKREGION *, DB_LOCKER *, u_int32_t)); + */ +void +__lock_freelocker(lt, region, sh_locker, indx) + DB_LOCKTAB *lt; + DB_LOCKREGION *region; + DB_LOCKER *sh_locker; + u_int32_t indx; + +{ + HASHREMOVE_EL( + lt->locker_tab, indx, __db_locker, links, sh_locker); + SH_TAILQ_INSERT_HEAD( + ®ion->free_lockers, sh_locker, links, __db_locker); + region->nlockers--; +} + +/* + * __lock_getlocker -- + * Get a locker in the locker hash table. The create parameter + * indicates if the locker should be created if it doesn't exist in + * the table. + * + * This must be called with the locker bucket locked. + * + * PUBLIC: int __lock_getlocker __P((DB_LOCKTAB *, + * PUBLIC: u_int32_t, u_int32_t, int, DB_LOCKER **)); + */ +int +__lock_getlocker(lt, locker, indx, create, retp) + DB_LOCKTAB *lt; + u_int32_t locker, indx; + int create; + DB_LOCKER **retp; +{ + DB_ENV *dbenv; + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + + dbenv = lt->dbenv; + region = lt->reginfo.primary; + + HASHLOOKUP(lt->locker_tab, + indx, __db_locker, links, locker, sh_locker, __lock_locker_cmp); + + /* + * If we found the locker, then we can just return it. If + * we didn't find the locker, then we need to create it. + */ + if (sh_locker == NULL && create) { + /* Create new locker and then insert it into hash table. */ + if ((sh_locker = SH_TAILQ_FIRST( + ®ion->free_lockers, __db_locker)) == NULL) { + __db_err(lt->dbenv, __db_lock_err, "locker entries"); + return (ENOMEM); + } + SH_TAILQ_REMOVE( + ®ion->free_lockers, sh_locker, links, __db_locker); + if (++region->nlockers > region->maxnlockers) + region->maxnlockers = region->nlockers; + + sh_locker->id = locker; + sh_locker->dd_id = 0; + sh_locker->master_locker = INVALID_ROFF; + sh_locker->parent_locker = INVALID_ROFF; + SH_LIST_INIT(&sh_locker->child_locker); + sh_locker->flags = 0; + SH_LIST_INIT(&sh_locker->heldby); + + HASHINSERT(lt->locker_tab, indx, __db_locker, links, sh_locker); + } + + *retp = sh_locker; + return (0); +} + +/* + * __lock_getobj -- + * Get an object in the object hash table. The create parameter + * indicates if the object should be created if it doesn't exist in + * the table. + * + * This must be called with the object bucket locked. + * + * PUBLIC: int __lock_getobj __P((DB_LOCKTAB *, + * PUBLIC: const DBT *, u_int32_t, int, DB_LOCKOBJ **)); + */ +int +__lock_getobj(lt, obj, ndx, create, retp) + DB_LOCKTAB *lt; + const DBT *obj; + u_int32_t ndx; + int create; + DB_LOCKOBJ **retp; +{ + DB_ENV *dbenv; + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + int ret; + void *p; + + dbenv = lt->dbenv; + region = lt->reginfo.primary; + + /* Look up the object in the hash table. */ + HASHLOOKUP(lt->obj_tab, + ndx, __db_lockobj, links, obj, sh_obj, __lock_cmp); + + /* + * If we found the object, then we can just return it. If + * we didn't find the object, then we need to create it. + */ + if (sh_obj == NULL && create) { + /* Create new object and then insert it into hash table. */ + if ((sh_obj = + SH_TAILQ_FIRST(®ion->free_objs, __db_lockobj)) == NULL) { + __db_err(lt->dbenv, __db_lock_err, "object entries"); + ret = ENOMEM; + goto err; + } + + /* + * If we can fit this object in the structure, do so instead + * of shalloc-ing space for it. + */ + if (obj->size <= sizeof(sh_obj->objdata)) + p = sh_obj->objdata; + else if ((ret = __db_shalloc( + lt->reginfo.addr, obj->size, 0, &p)) != 0) { + __db_err(dbenv, "No space for lock object storage"); + goto err; + } + + memcpy(p, obj->data, obj->size); + + SH_TAILQ_REMOVE( + ®ion->free_objs, sh_obj, links, __db_lockobj); + if (++region->nobjects > region->maxnobjects) + region->maxnobjects = region->nobjects; + + SH_TAILQ_INIT(&sh_obj->waiters); + SH_TAILQ_INIT(&sh_obj->holders); + sh_obj->lockobj.size = obj->size; + sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p); + + HASHINSERT(lt->obj_tab, ndx, __db_lockobj, links, sh_obj); + } + + *retp = sh_obj; + return (0); + +err: return (ret); +} + +/* + * __lock_is_parent -- + * Given a locker and a transaction, return 1 if the locker is + * an ancestor of the designcated transaction. This is used to determine + * if we should grant locks that appear to conflict, but don't because + * the lock is already held by an ancestor. + */ +static int +__lock_is_parent(lt, locker, sh_locker) + DB_LOCKTAB *lt; + u_int32_t locker; + DB_LOCKER *sh_locker; +{ + DB_LOCKER *parent; + + parent = sh_locker; + while (parent->parent_locker != INVALID_ROFF) { + parent = (DB_LOCKER *) + R_ADDR(<->reginfo, parent->parent_locker); + if (parent->id == locker) + return (1); + } + + return (0); +} + +/* + * __lock_promote -- + * + * Look through the waiters and holders lists and decide which (if any) + * locks can be promoted. Promote any that are eligible. + * + * PUBLIC: int __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *, int)); + */ +int +__lock_promote(lt, obj, not_waiters) + DB_LOCKTAB *lt; + DB_LOCKOBJ *obj; + int not_waiters; +{ + struct __db_lock *lp_w, *lp_h, *next_waiter; + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + u_int32_t locker_ndx; + int had_waiters, state_changed; + + region = lt->reginfo.primary; + had_waiters = 0; + + /* + * We need to do lock promotion. We also need to determine if we're + * going to need to run the deadlock detector again. If we release + * locks, and there are waiters, but no one gets promoted, then we + * haven't fundamentally changed the lockmgr state, so we may still + * have a deadlock and we have to run again. However, if there were + * no waiters, or we actually promoted someone, then we are OK and we + * don't have to run it immediately. + * + * During promotion, we look for state changes so we can return this + * information to the caller. + */ + + for (lp_w = SH_TAILQ_FIRST(&obj->waiters, __db_lock), + state_changed = lp_w == NULL; + lp_w != NULL; + lp_w = next_waiter) { + had_waiters = 1; + next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock); + /* Are we switching locks? */ + if (not_waiters && lp_w->mode == DB_LOCK_WAIT) + continue; + for (lp_h = SH_TAILQ_FIRST(&obj->holders, __db_lock); + lp_h != NULL; + lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) { + if (lp_h->holder != lp_w->holder && + CONFLICTS(lt, region, lp_h->mode, lp_w->mode)) { + + LOCKER_LOCK(lt, region, lp_w->holder, locker_ndx); + if ((__lock_getlocker(lt, lp_w->holder, + locker_ndx, 0, &sh_locker)) != 0) { + DB_ASSERT(0); + break; + } + if (!__lock_is_parent(lt, + lp_h->holder, sh_locker)) + break; + } + } + if (lp_h != NULL) /* Found a conflict. */ + break; + + /* No conflict, promote the waiting lock. */ + SH_TAILQ_REMOVE(&obj->waiters, lp_w, links, __db_lock); + lp_w->status = DB_LSTAT_PENDING; + SH_TAILQ_INSERT_TAIL(&obj->holders, lp_w, links); + + /* Wake up waiter. */ + MUTEX_UNLOCK(lt->dbenv, &lp_w->mutex); + state_changed = 1; + } + + /* + * If this object had waiters and doesn't any more, then we need + * to remove it from the dd_obj list. + */ + if (had_waiters && SH_TAILQ_FIRST(&obj->waiters, __db_lock) == NULL) + SH_TAILQ_REMOVE(®ion->dd_objs, obj, dd_links, __db_lockobj); + return (state_changed); +} + +/* + * __lock_remove_waiter -- + * Any lock on the waitlist has a process waiting for it. Therefore, + * we can't return the lock to the freelist immediately. Instead, we can + * remove the lock from the list of waiters, set the status field of the + * lock, and then let the process waking up return the lock to the + * free list. + * + * This must be called with the Object bucket locked. + */ +static void +__lock_remove_waiter(dbenv, sh_obj, lockp, status) + DB_ENV *dbenv; + DB_LOCKOBJ *sh_obj; + struct __db_lock *lockp; + db_status_t status; +{ + int do_wakeup; + + do_wakeup = lockp->status == DB_LSTAT_WAITING; + + SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock); + lockp->links.stqe_prev = -1; + lockp->status = status; + + /* + * Wake whoever is waiting on this lock. + * + * The MUTEX_UNLOCK macro normally resolves to a single argument, + * keep the compiler quiet. + */ + if (do_wakeup) + MUTEX_UNLOCK(dbenv, &lockp->mutex); +} + +/* + * __lock_printlock -- + * + * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int)); + */ +void +__lock_printlock(lt, lp, ispgno) + DB_LOCKTAB *lt; + struct __db_lock *lp; + int ispgno; +{ + DB_LOCKOBJ *lockobj; + db_pgno_t pgno; + u_int32_t *fidp; + u_int8_t *ptr, type; + const char *mode, *status; + + switch (lp->mode) { + case DB_LOCK_IREAD: + mode = "IREAD"; + break; + case DB_LOCK_IWR: + mode = "IWR"; + break; + case DB_LOCK_IWRITE: + mode = "IWRITE"; + break; + case DB_LOCK_NG: + mode = "NG"; + break; + case DB_LOCK_READ: + mode = "READ"; + break; + case DB_LOCK_WRITE: + mode = "WRITE"; + break; + case DB_LOCK_WAIT: + mode = "WAIT"; + break; + default: + mode = "UNKNOWN"; + break; + } + switch (lp->status) { + case DB_LSTAT_ABORTED: + status = "ABORT"; + break; + case DB_LSTAT_ERR: + status = "ERROR"; + break; + case DB_LSTAT_FREE: + status = "FREE"; + break; + case DB_LSTAT_HELD: + status = "HELD"; + break; + case DB_LSTAT_NOGRANT: + status = "NONE"; + break; + case DB_LSTAT_WAITING: + status = "WAIT"; + break; + case DB_LSTAT_PENDING: + status = "PENDING"; + break; + default: + status = "UNKNOWN"; + break; + } + printf("\t%lx\t%s\t%lu\t%s\t", + (u_long)lp->holder, mode, (u_long)lp->refcount, status); + + lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); + ptr = SH_DBT_PTR(&lockobj->lockobj); + if (ispgno && lockobj->lockobj.size == sizeof(struct __db_ilock)) { + /* Assume this is a DBT lock. */ + memcpy(&pgno, ptr, sizeof(db_pgno_t)); + fidp = (u_int32_t *)(ptr + sizeof(db_pgno_t)); + type = *(u_int8_t *)(ptr + sizeof(db_pgno_t) + DB_FILE_ID_LEN); + printf("%s %lu (%lu %lu %lu %lu %lu)\n", + type == DB_PAGE_LOCK ? "page" : "record", + (u_long)pgno, + (u_long)fidp[0], (u_long)fidp[1], (u_long)fidp[2], + (u_long)fidp[3], (u_long)fidp[4]); + } else { + printf("0x%lx ", (u_long)R_OFFSET(<->reginfo, lockobj)); + __db_pr(ptr, lockobj->lockobj.size); + printf("\n"); + } +} diff --git a/bdb/lock/lock_conflict.c b/bdb/lock/lock_conflict.c new file mode 100644 index 00000000000..2d7945fe201 --- /dev/null +++ b/bdb/lock/lock_conflict.c @@ -0,0 +1,34 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: lock_conflict.c,v 11.6 2000/12/12 17:38:13 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#endif + +#include "db_int.h" + +/* + * The conflict arrays are set up such that the row is the lock you + * are holding and the column is the lock that is desired. + */ + +const u_int8_t db_riw_conflicts[] = { + /* N S X WT IX IS SIX */ + /* N */ 0, 0, 0, 0, 0, 0, 0, + /* S */ 0, 0, 1, 0, 1, 0, 1, + /* X */ 0, 1, 1, 1, 1, 1, 1, + /* WT */ 0, 0, 0, 0, 0, 0, 0, + /* IX */ 0, 1, 1, 0, 0, 0, 0, + /* IS */ 0, 0, 1, 0, 0, 0, 0, + /* SIX */ 0, 1, 1, 0, 0, 0, 0 +}; diff --git a/bdb/lock/lock_deadlock.c b/bdb/lock/lock_deadlock.c new file mode 100644 index 00000000000..1f37db3890e --- /dev/null +++ b/bdb/lock/lock_deadlock.c @@ -0,0 +1,637 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: lock_deadlock.c,v 11.23 2000/12/08 20:15:31 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_shash.h" +#include "lock.h" +#include "txn.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +#define ISSET_MAP(M, N) ((M)[(N) / 32] & (1 << (N) % 32)) + +#define CLEAR_MAP(M, N) { \ + u_int32_t __i; \ + for (__i = 0; __i < (N); __i++) \ + (M)[__i] = 0; \ +} + +#define SET_MAP(M, B) ((M)[(B) / 32] |= (1 << ((B) % 32))) +#define CLR_MAP(M, B) ((M)[(B) / 32] &= ~(1 << ((B) % 32))) + +#define OR_MAP(D, S, N) { \ + u_int32_t __i; \ + for (__i = 0; __i < (N); __i++) \ + D[__i] |= S[__i]; \ +} +#define BAD_KILLID 0xffffffff + +typedef struct { + int valid; + u_int32_t id; + u_int32_t last_lock; + u_int32_t last_locker_id; + db_pgno_t pgno; +} locker_info; + +static int __dd_abort __P((DB_ENV *, locker_info *)); +static int __dd_build + __P((DB_ENV *, u_int32_t **, u_int32_t *, locker_info **)); +static int __dd_find + __P((DB_ENV *,u_int32_t *, locker_info *, u_int32_t, u_int32_t ***)); + +#ifdef DIAGNOSTIC +static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t)); +#endif + +int +lock_detect(dbenv, flags, atype, abortp) + DB_ENV *dbenv; + u_int32_t flags, atype; + int *abortp; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + locker_info *idmap; + u_int32_t *bitmap, **deadp, **free_me, i, killid, nentries, nlockers; + int do_pass, ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_lock_detect(dbenv, flags, atype, abortp)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + + lt = dbenv->lk_handle; + if (abortp != NULL) + *abortp = 0; + + /* Validate arguments. */ + if ((ret = + __db_fchk(dbenv, "lock_detect", flags, DB_LOCK_CONFLICT)) != 0) + return (ret); + + /* Check if a detector run is necessary. */ + LOCKREGION(dbenv, lt); + if (LF_ISSET(DB_LOCK_CONFLICT)) { + /* Make a pass every time a lock waits. */ + region = lt->reginfo.primary; + do_pass = region->need_dd != 0; + + if (!do_pass) { + UNLOCKREGION(dbenv, lt); + return (0); + } + } + + /* Build the waits-for bitmap. */ + ret = __dd_build(dbenv, &bitmap, &nlockers, &idmap); + UNLOCKREGION(dbenv, lt); + if (ret != 0) + return (ret); + + if (nlockers == 0) + return (0); +#ifdef DIAGNOSTIC + if (FLD_ISSET(dbenv->verbose, DB_VERB_WAITSFOR)) + __dd_debug(dbenv, idmap, bitmap, nlockers); +#endif + /* Find a deadlock. */ + if ((ret = __dd_find(dbenv, bitmap, idmap, nlockers, &deadp)) != 0) + return (ret); + + nentries = ALIGN(nlockers, 32) / 32; + killid = BAD_KILLID; + free_me = deadp; + for (; *deadp != NULL; deadp++) { + if (abortp != NULL) + ++*abortp; + switch (atype) { /* Kill someone. */ + case DB_LOCK_OLDEST: + /* + * Find the first bit set in the current + * array and then look for a lower tid in + * the array. + */ + for (i = 0; i < nlockers; i++) + if (ISSET_MAP(*deadp, i)) { + killid = i; + break; + + } + /* + * It's conceivable that under XA, the locker could + * have gone away. + */ + if (killid == BAD_KILLID) + break; + + /* + * The oldest transaction has the lowest + * transaction id. + */ + for (i = killid + 1; i < nlockers; i++) + if (ISSET_MAP(*deadp, i) && + idmap[i].id < idmap[killid].id) + killid = i; + break; + case DB_LOCK_DEFAULT: + case DB_LOCK_RANDOM: + /* + * We are trying to calculate the id of the + * locker whose entry is indicated by deadlock. + */ + killid = (*deadp - bitmap) / nentries; + break; + case DB_LOCK_YOUNGEST: + /* + * Find the first bit set in the current + * array and then look for a lower tid in + * the array. + */ + for (i = 0; i < nlockers; i++) + if (ISSET_MAP(*deadp, i)) { + killid = i; + break; + } + + /* + * It's conceivable that under XA, the locker could + * have gone away. + */ + if (killid == BAD_KILLID) + break; + + /* + * The youngest transaction has the highest + * transaction id. + */ + for (i = killid + 1; i < nlockers; i++) + if (ISSET_MAP(*deadp, i) && + idmap[i].id > idmap[killid].id) + killid = i; + break; + default: + killid = BAD_KILLID; + ret = EINVAL; + } + + if (killid == BAD_KILLID) + continue; + + /* Kill the locker with lockid idmap[killid]. */ + if ((ret = __dd_abort(dbenv, &idmap[killid])) != 0) { + /* + * It's possible that the lock was already aborted; + * this isn't necessarily a problem, so do not treat + * it as an error. + */ + if (ret == DB_ALREADY_ABORTED) + ret = 0; + else + __db_err(dbenv, + "warning: unable to abort locker %lx", + (u_long)idmap[killid].id); + } else if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK)) + __db_err(dbenv, + "Aborting locker %lx", (u_long)idmap[killid].id); + } + __os_free(free_me, 0); + __os_free(bitmap, 0); + __os_free(idmap, 0); + + return (ret); +} + +/* + * ======================================================================== + * Utilities + */ + +# define DD_INVALID_ID ((u_int32_t) -1) + +static int +__dd_build(dbenv, bmp, nlockers, idmap) + DB_ENV *dbenv; + u_int32_t **bmp, *nlockers; + locker_info **idmap; +{ + struct __db_lock *lp; + DB_LOCKER *lip, *lockerp, *child; + DB_LOCKOBJ *op, *lo; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + locker_info *id_array; + u_int32_t *bitmap, count, dd, *entryp, i, id, ndx, nentries, *tmpmap; + u_int8_t *pptr; + int is_first, ret; + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + /* + * We'll check how many lockers there are, add a few more in for + * good measure and then allocate all the structures. Then we'll + * verify that we have enough room when we go back in and get the + * mutex the second time. + */ +retry: count = region->nlockers; + region->need_dd = 0; + + if (count == 0) { + *nlockers = 0; + return (0); + } + + if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK)) + __db_err(dbenv, "%lu lockers", (u_long)count); + + count += 40; + nentries = ALIGN(count, 32) / 32; + + /* + * Allocate enough space for a count by count bitmap matrix. + * + * XXX + * We can probably save the malloc's between iterations just + * reallocing if necessary because count grew by too much. + */ + if ((ret = __os_calloc(dbenv, (size_t)count, + sizeof(u_int32_t) * nentries, &bitmap)) != 0) + return (ret); + + if ((ret = __os_calloc(dbenv, + sizeof(u_int32_t), nentries, &tmpmap)) != 0) { + __os_free(bitmap, sizeof(u_int32_t) * nentries); + return (ret); + } + + if ((ret = __os_calloc(dbenv, + (size_t)count, sizeof(locker_info), &id_array)) != 0) { + __os_free(bitmap, count * sizeof(u_int32_t) * nentries); + __os_free(tmpmap, sizeof(u_int32_t) * nentries); + return (ret); + } + + /* + * Now go back in and actually fill in the matrix. + */ + if (region->nlockers > count) { + __os_free(bitmap, count * sizeof(u_int32_t) * nentries); + __os_free(tmpmap, sizeof(u_int32_t) * nentries); + __os_free(id_array, count * sizeof(locker_info)); + goto retry; + } + + /* + * First we go through and assign each locker a deadlock detector id. + */ + for (id = 0, i = 0; i < region->locker_t_size; i++) { + for (lip = SH_TAILQ_FIRST(<->locker_tab[i], __db_locker); + lip != NULL; lip = SH_TAILQ_NEXT(lip, links, __db_locker)) + if (lip->master_locker == INVALID_ROFF) { + lip->dd_id = id++; + id_array[lip->dd_id].id = lip->id; + } else + lip->dd_id = DD_INVALID_ID; + } + + /* + * We only need consider objects that have waiters, so we use + * the list of objects with waiters (dd_objs) instead of traversing + * the entire hash table. For each object, we traverse the waiters + * list and add an entry in the waitsfor matrix for each waiter/holder + * combination. + */ + for (op = SH_TAILQ_FIRST(®ion->dd_objs, __db_lockobj); + op != NULL; op = SH_TAILQ_NEXT(op, dd_links, __db_lockobj)) { + CLEAR_MAP(tmpmap, nentries); + + /* + * First we go through and create a bit map that + * represents all the holders of this object. + */ + for (lp = SH_TAILQ_FIRST(&op->holders, __db_lock); + lp != NULL; + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { + LOCKER_LOCK(lt, region, lp->holder, ndx); + if ((ret = __lock_getlocker(lt, + lp->holder, ndx, 0, &lockerp)) != 0) + continue; + if (lockerp->dd_id == DD_INVALID_ID) + dd = ((DB_LOCKER *) + R_ADDR(<->reginfo, + lockerp->master_locker))->dd_id; + else + dd = lockerp->dd_id; + id_array[dd].valid = 1; + + /* + * If the holder has already been aborted, then + * we should ignore it for now. + */ + if (lp->status == DB_LSTAT_HELD) + SET_MAP(tmpmap, dd); + } + + /* + * Next, for each waiter, we set its row in the matrix + * equal to the map of holders we set up above. + */ + for (is_first = 1, + lp = SH_TAILQ_FIRST(&op->waiters, __db_lock); + lp != NULL; + is_first = 0, + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { + LOCKER_LOCK(lt, region, lp->holder, ndx); + if ((ret = __lock_getlocker(lt, + lp->holder, ndx, 0, &lockerp)) != 0) + continue; + if (lockerp->dd_id == DD_INVALID_ID) + dd = ((DB_LOCKER *) + R_ADDR(<->reginfo, + lockerp->master_locker))->dd_id; + else + dd = lockerp->dd_id; + id_array[dd].valid = 1; + + /* + * If the transaction is pending abortion, then + * ignore it on this iteration. + */ + if (lp->status != DB_LSTAT_WAITING) + continue; + + entryp = bitmap + (nentries * dd); + OR_MAP(entryp, tmpmap, nentries); + /* + * If this is the first waiter on the queue, + * then we remove the waitsfor relationship + * with oneself. However, if it's anywhere + * else on the queue, then we have to keep + * it and we have an automatic deadlock. + */ + if (is_first) + CLR_MAP(entryp, dd); + } + } + + /* Now for each locker; record its last lock. */ + for (id = 0; id < count; id++) { + if (!id_array[id].valid) + continue; + LOCKER_LOCK(lt, region, id_array[id].id, ndx); + if ((ret = __lock_getlocker(lt, + id_array[id].id, ndx, 0, &lockerp)) != 0) { + __db_err(dbenv, + "No locks for locker %lu", (u_long)id_array[id].id); + continue; + } + + /* + * If this is a master transaction, try to + * find one of its children's locks first, + * as they are probably more recent. + */ + child = SH_LIST_FIRST(&lockerp->child_locker, __db_locker); + if (child != NULL) { + do { + lp = SH_LIST_FIRST(&child->heldby, __db_lock); + if (lp != NULL && + lp->status == DB_LSTAT_WAITING) { + id_array[id].last_locker_id = child->id; + goto get_lock; + } + child = SH_LIST_NEXT( + child, child_link, __db_locker); + } while (child != NULL); + } + lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock); + if (lp != NULL) { + id_array[id].last_locker_id = lockerp->id; + get_lock: id_array[id].last_lock = R_OFFSET(<->reginfo, lp); + lo = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); + pptr = SH_DBT_PTR(&lo->lockobj); + if (lo->lockobj.size >= sizeof(db_pgno_t)) + memcpy(&id_array[id].pgno, + pptr, sizeof(db_pgno_t)); + else + id_array[id].pgno = 0; + } + } + + /* Pass complete, reset the deadlock detector bit. */ + region->need_dd = 0; + + /* + * Now we can release everything except the bitmap matrix that we + * created. + */ + *nlockers = id; + *idmap = id_array; + *bmp = bitmap; + __os_free(tmpmap, sizeof(u_int32_t) * nentries); + return (0); +} + +static int +__dd_find(dbenv, bmp, idmap, nlockers, deadp) + DB_ENV *dbenv; + u_int32_t *bmp, nlockers; + locker_info *idmap; + u_int32_t ***deadp; +{ + u_int32_t i, j, k, nentries, *mymap, *tmpmap; + u_int32_t **retp; + int ndead, ndeadalloc, ret; + +#undef INITIAL_DEAD_ALLOC +#define INITIAL_DEAD_ALLOC 8 + + ndeadalloc = INITIAL_DEAD_ALLOC; + ndead = 0; + if ((ret = __os_malloc(dbenv, + ndeadalloc * sizeof(u_int32_t *), NULL, &retp)) != 0) + return (ret); + + /* + * For each locker, OR in the bits from the lockers on which that + * locker is waiting. + */ + nentries = ALIGN(nlockers, 32) / 32; + for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) { + if (!idmap[i].valid) + continue; + for (j = 0; j < nlockers; j++) { + if (!ISSET_MAP(mymap, j)) + continue; + + /* Find the map for this bit. */ + tmpmap = bmp + (nentries * j); + OR_MAP(mymap, tmpmap, nentries); + if (!ISSET_MAP(mymap, i)) + continue; + + /* Make sure we leave room for NULL. */ + if (ndead + 2 >= ndeadalloc) { + ndeadalloc <<= 1; + /* + * If the alloc fails, then simply return the + * deadlocks that we already have. + */ + if (__os_realloc(dbenv, + ndeadalloc * sizeof(u_int32_t), + NULL, &retp) != 0) { + retp[ndead] = NULL; + *deadp = retp; + return (0); + } + } + retp[ndead++] = mymap; + + /* Mark all participants in this deadlock invalid. */ + for (k = 0; k < nlockers; k++) + if (ISSET_MAP(mymap, k)) + idmap[k].valid = 0; + break; + } + } + retp[ndead] = NULL; + *deadp = retp; + return (0); +} + +static int +__dd_abort(dbenv, info) + DB_ENV *dbenv; + locker_info *info; +{ + struct __db_lock *lockp; + DB_LOCKER *lockerp; + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + u_int32_t ndx; + int ret; + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + LOCKREGION(dbenv, lt); + /* Find the locker's last lock. */ + LOCKER_LOCK(lt, region, info->last_locker_id, ndx); + if ((ret = __lock_getlocker(lt, + info->last_locker_id, ndx, 0, &lockerp)) != 0 || lockerp == NULL) { + if (ret == 0) + ret = DB_ALREADY_ABORTED; + goto out; + } + + lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock); + + /* + * It's possible that this locker was already aborted. If that's + * the case, make sure that we remove its locker from the hash table. + */ + if (lockp == NULL) { + if (LOCKER_FREEABLE(lockerp)) { + __lock_freelocker(lt, region, lockerp, ndx); + goto out; + } + } else if (R_OFFSET(<->reginfo, lockp) != info->last_lock || + lockp->status != DB_LSTAT_WAITING) { + ret = DB_ALREADY_ABORTED; + goto out; + } + + sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); + SH_LIST_REMOVE(lockp, locker_links, __db_lock); + + /* Abort lock, take it off list, and wake up this lock. */ + SHOBJECT_LOCK(lt, region, sh_obj, ndx); + lockp->status = DB_LSTAT_ABORTED; + SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock); + + /* + * Either the waiters list is now empty, in which case we remove + * it from dd_objs, or it is not empty, in which case we need to + * do promotion. + */ + if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) + SH_TAILQ_REMOVE(®ion->dd_objs, + sh_obj, dd_links, __db_lockobj); + else + ret = __lock_promote(lt, sh_obj, 0); + MUTEX_UNLOCK(dbenv, &lockp->mutex); + + region->ndeadlocks++; + UNLOCKREGION(dbenv, lt); + + return (0); + +out: UNLOCKREGION(dbenv, lt); + return (ret); +} + +#ifdef DIAGNOSTIC +static void +__dd_debug(dbenv, idmap, bitmap, nlockers) + DB_ENV *dbenv; + locker_info *idmap; + u_int32_t *bitmap, nlockers; +{ + u_int32_t i, j, *mymap, nentries; + int ret; + char *msgbuf; + + __db_err(dbenv, "Waitsfor array\nWaiter:\tWaiting on:"); + + /* Allocate space to print 10 bytes per item waited on. */ +#undef MSGBUF_LEN +#define MSGBUF_LEN ((nlockers + 1) * 10 + 64) + if ((ret = __os_malloc(dbenv, MSGBUF_LEN, NULL, &msgbuf)) != 0) + return; + + nentries = ALIGN(nlockers, 32) / 32; + for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) { + if (!idmap[i].valid) + continue; + sprintf(msgbuf, /* Waiter. */ + "%lx/%lu:\t", (u_long)idmap[i].id, (u_long)idmap[i].pgno); + for (j = 0; j < nlockers; j++) + if (ISSET_MAP(mymap, j)) + sprintf(msgbuf, "%s %lx", msgbuf, + (u_long)idmap[j].id); + (void)sprintf(msgbuf, + "%s %lu", msgbuf, (u_long)idmap[i].last_lock); + __db_err(dbenv, msgbuf); + } + + __os_free(msgbuf, MSGBUF_LEN); +} +#endif diff --git a/bdb/lock/lock_method.c b/bdb/lock/lock_method.c new file mode 100644 index 00000000000..46ed9e5166f --- /dev/null +++ b/bdb/lock/lock_method.c @@ -0,0 +1,148 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: lock_method.c,v 11.5 2000/12/21 19:16:42 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#include "db_int.h" +#include "db_shash.h" +#include "lock.h" + +/* + * __lock_set_lk_conflicts + * Set the conflicts matrix. + * + * PUBLIC: int __lock_set_lk_conflicts __P((DB_ENV *, u_int8_t *, int)); + */ +int +__lock_set_lk_conflicts(dbenv, lk_conflicts, lk_modes) + DB_ENV *dbenv; + u_int8_t *lk_conflicts; + int lk_modes; +{ + int ret; + + ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_conflicts"); + + if (dbenv->lk_conflicts != NULL) { + __os_free(dbenv->lk_conflicts, + dbenv->lk_modes * dbenv->lk_modes); + dbenv->lk_conflicts = NULL; + } + if ((ret = __os_malloc(dbenv, + lk_modes * lk_modes, NULL, &dbenv->lk_conflicts)) != 0) + return (ret); + memcpy(dbenv->lk_conflicts, lk_conflicts, lk_modes * lk_modes); + dbenv->lk_modes = lk_modes; + + return (0); +} + +/* + * __lock_set_lk_detect + * Set the automatic deadlock detection. + * + * PUBLIC: int __lock_set_lk_detect __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_detect(dbenv, lk_detect) + DB_ENV *dbenv; + u_int32_t lk_detect; +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_detect"); + + switch (lk_detect) { + case DB_LOCK_DEFAULT: + case DB_LOCK_OLDEST: + case DB_LOCK_RANDOM: + case DB_LOCK_YOUNGEST: + break; + default: + return (EINVAL); + } + dbenv->lk_detect = lk_detect; + return (0); +} + +/* + * __lock_set_lk_max + * Set the lock table size. + * + * PUBLIC: int __lock_set_lk_max __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_max(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_max"); + + dbenv->lk_max = lk_max; + dbenv->lk_max_objects = lk_max; + dbenv->lk_max_lockers = lk_max; + return (0); +} + +/* + * __lock_set_lk_max_locks + * Set the lock table size. + * + * PUBLIC: int __lock_set_lk_max_locks __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_max_locks(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_max_locks"); + + dbenv->lk_max = lk_max; + return (0); +} + +/* + * __lock_set_lk_max_lockers + * Set the lock table size. + * + * PUBLIC: int __lock_set_lk_max_lockers __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_max_lockers(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_max_lockers"); + + dbenv->lk_max_lockers = lk_max; + return (0); +} + +/* + * __lock_set_lk_max_objects + * Set the lock table size. + * + * PUBLIC: int __lock_set_lk_max_objects __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_max_objects(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_max_objects"); + + dbenv->lk_max_objects = lk_max; + return (0); +} diff --git a/bdb/lock/lock_region.c b/bdb/lock/lock_region.c new file mode 100644 index 00000000000..4bd4ee4b765 --- /dev/null +++ b/bdb/lock/lock_region.c @@ -0,0 +1,430 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: lock_region.c,v 11.41 2000/12/20 21:53:04 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_shash.h" +#include "lock.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +static int __lock_init __P((DB_ENV *, DB_LOCKTAB *)); +static size_t + __lock_region_size __P((DB_ENV *)); + +#ifdef MUTEX_SYSTEM_RESOURCES +static size_t __lock_region_maint __P((DB_ENV *)); +#endif + +/* + * This conflict array is used for concurrent db access (CDB). It + * uses the same locks as the db_rw_conflict array, but adds an IW + * mode to be used for write cursors. + */ +#define DB_LOCK_CDB_N 5 +static u_int8_t const db_cdb_conflicts[] = { + /* N R W WT IW*/ + /* N */ 0, 0, 0, 0, 0, + /* R */ 0, 0, 1, 0, 0, + /* W */ 0, 1, 1, 1, 1, + /* WT */ 0, 0, 0, 0, 0, + /* IW */ 0, 0, 1, 0, 1, +}; + +/* + * __lock_dbenv_create -- + * Lock specific creation of the DB_ENV structure. + * + * PUBLIC: void __lock_dbenv_create __P((DB_ENV *)); + */ +void +__lock_dbenv_create(dbenv) + DB_ENV *dbenv; +{ + dbenv->lk_max = DB_LOCK_DEFAULT_N; + dbenv->lk_max_lockers = DB_LOCK_DEFAULT_N; + dbenv->lk_max_objects = DB_LOCK_DEFAULT_N; + + dbenv->set_lk_conflicts = __lock_set_lk_conflicts; + dbenv->set_lk_detect = __lock_set_lk_detect; + dbenv->set_lk_max = __lock_set_lk_max; + dbenv->set_lk_max_locks = __lock_set_lk_max_locks; + dbenv->set_lk_max_lockers = __lock_set_lk_max_lockers; + dbenv->set_lk_max_objects = __lock_set_lk_max_objects; + +#ifdef HAVE_RPC + /* + * If we have a client, overwrite what we just set up to point + * to the client functions. + */ + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) { + dbenv->set_lk_conflicts = __dbcl_set_lk_conflict; + dbenv->set_lk_detect = __dbcl_set_lk_detect; + dbenv->set_lk_max = __dbcl_set_lk_max; + dbenv->set_lk_max_locks = __dbcl_set_lk_max_locks; + dbenv->set_lk_max_lockers = __dbcl_set_lk_max_lockers; + dbenv->set_lk_max_objects = __dbcl_set_lk_max_objects; + } +#endif +} + +/* + * __lock_dbenv_close -- + * Lock specific destruction of the DB_ENV structure. + * + * PUBLIC: void __lock_dbenv_close __P((DB_ENV *)); + */ +void +__lock_dbenv_close(dbenv) + DB_ENV *dbenv; +{ + if (!F_ISSET(dbenv, DB_ENV_USER_ALLOC) && dbenv->lk_conflicts != NULL) { + __os_free(dbenv->lk_conflicts, + dbenv->lk_modes * dbenv->lk_modes); + dbenv->lk_conflicts = NULL; + } +} + +/* + * __lock_open -- + * Internal version of lock_open: only called from DB_ENV->open. + * + * PUBLIC: int __lock_open __P((DB_ENV *)); + */ +int +__lock_open(dbenv) + DB_ENV *dbenv; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + size_t size; + int ret; + + /* Create the lock table structure. */ + if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOCKTAB), <)) != 0) + return (ret); + lt->dbenv = dbenv; + + /* Join/create the lock region. */ + lt->reginfo.type = REGION_TYPE_LOCK; + lt->reginfo.id = INVALID_REGION_ID; + lt->reginfo.mode = dbenv->db_mode; + lt->reginfo.flags = REGION_JOIN_OK; + if (F_ISSET(dbenv, DB_ENV_CREATE)) + F_SET(<->reginfo, REGION_CREATE_OK); + size = __lock_region_size(dbenv); + if ((ret = __db_r_attach(dbenv, <->reginfo, size)) != 0) + goto err; + + /* If we created the region, initialize it. */ + if (F_ISSET(<->reginfo, REGION_CREATE)) + if ((ret = __lock_init(dbenv, lt)) != 0) + goto err; + + /* Set the local addresses. */ + region = lt->reginfo.primary = + R_ADDR(<->reginfo, lt->reginfo.rp->primary); + + /* Check for incompatible automatic deadlock detection requests. */ + if (dbenv->lk_detect != DB_LOCK_NORUN) { + if (region->detect != DB_LOCK_NORUN && + dbenv->lk_detect != DB_LOCK_DEFAULT && + region->detect != dbenv->lk_detect) { + __db_err(dbenv, + "lock_open: incompatible deadlock detector mode"); + ret = EINVAL; + goto err; + } + + /* + * Upgrade if our caller wants automatic detection, and it + * was not currently being done, whether or not we created + * the region. + */ + if (region->detect == DB_LOCK_NORUN) + region->detect = dbenv->lk_detect; + } + + /* Set remaining pointers into region. */ + lt->conflicts = (u_int8_t *)R_ADDR(<->reginfo, region->conf_off); + lt->obj_tab = (DB_HASHTAB *)R_ADDR(<->reginfo, region->obj_off); + lt->locker_tab = (DB_HASHTAB *)R_ADDR(<->reginfo, region->locker_off); + + R_UNLOCK(dbenv, <->reginfo); + + dbenv->lk_handle = lt; + return (0); + +err: if (lt->reginfo.addr != NULL) { + if (F_ISSET(<->reginfo, REGION_CREATE)) + ret = __db_panic(dbenv, ret); + R_UNLOCK(dbenv, <->reginfo); + (void)__db_r_detach(dbenv, <->reginfo, 0); + } + __os_free(lt, sizeof(*lt)); + return (ret); +} + +/* + * __lock_init -- + * Initialize the lock region. + */ +static int +__lock_init(dbenv, lt) + DB_ENV *dbenv; + DB_LOCKTAB *lt; +{ + const u_int8_t *lk_conflicts; + struct __db_lock *lp; + DB_LOCKER *lidp; + DB_LOCKOBJ *op; + DB_LOCKREGION *region; +#ifdef MUTEX_SYSTEM_RESOURCES + size_t maint_size; +#endif + u_int32_t i, lk_modes; + u_int8_t *addr; + int ret; + + if ((ret = __db_shalloc(lt->reginfo.addr, + sizeof(DB_LOCKREGION), 0, <->reginfo.primary)) != 0) + goto mem_err; + lt->reginfo.rp->primary = R_OFFSET(<->reginfo, lt->reginfo.primary); + region = lt->reginfo.primary; + memset(region, 0, sizeof(*region)); + + /* Select a conflict matrix if none specified. */ + if (dbenv->lk_modes == 0) + if (CDB_LOCKING(dbenv)) { + lk_modes = DB_LOCK_CDB_N; + lk_conflicts = db_cdb_conflicts; + } else { + lk_modes = DB_LOCK_RIW_N; + lk_conflicts = db_riw_conflicts; + } + else { + lk_modes = dbenv->lk_modes; + lk_conflicts = dbenv->lk_conflicts; + } + + region->id = 0; + region->need_dd = 0; + region->detect = DB_LOCK_NORUN; + region->maxlocks = dbenv->lk_max; + region->maxlockers = dbenv->lk_max_lockers; + region->maxobjects = dbenv->lk_max_objects; + region->locker_t_size = __db_tablesize(dbenv->lk_max_lockers); + region->object_t_size = __db_tablesize(dbenv->lk_max_objects); + region->nmodes = lk_modes; + region->nlocks = 0; + region->maxnlocks = 0; + region->nlockers = 0; + region->maxnlockers = 0; + region->nobjects = 0; + region->maxnobjects = 0; + region->nconflicts = 0; + region->nrequests = 0; + region->nreleases = 0; + region->ndeadlocks = 0; + + /* Allocate room for the conflict matrix and initialize it. */ + if ((ret = + __db_shalloc(lt->reginfo.addr, lk_modes * lk_modes, 0, &addr)) != 0) + goto mem_err; + memcpy(addr, lk_conflicts, lk_modes * lk_modes); + region->conf_off = R_OFFSET(<->reginfo, addr); + + /* Allocate room for the object hash table and initialize it. */ + if ((ret = __db_shalloc(lt->reginfo.addr, + region->object_t_size * sizeof(DB_HASHTAB), 0, &addr)) != 0) + goto mem_err; + __db_hashinit(addr, region->object_t_size); + region->obj_off = R_OFFSET(<->reginfo, addr); + + /* Allocate room for the locker hash table and initialize it. */ + if ((ret = __db_shalloc(lt->reginfo.addr, + region->locker_t_size * sizeof(DB_HASHTAB), 0, &addr)) != 0) + goto mem_err; + __db_hashinit(addr, region->locker_t_size); + region->locker_off = R_OFFSET(<->reginfo, addr); + +#ifdef MUTEX_SYSTEM_RESOURCES + maint_size = __lock_region_maint(dbenv); + /* Allocate room for the locker maintenance info and initialize it. */ + if ((ret = __db_shalloc(lt->reginfo.addr, + sizeof(REGMAINT) + maint_size, 0, &addr)) != 0) + goto mem_err; + __db_maintinit(<->reginfo, addr, maint_size); + region->maint_off = R_OFFSET(<->reginfo, addr); +#endif + + /* + * Initialize locks onto a free list. Initialize and lock the mutex + * so that when we need to block, all we need do is try to acquire + * the mutex. + */ + SH_TAILQ_INIT(®ion->free_locks); + for (i = 0; i < region->maxlocks; ++i) { + if ((ret = __db_shalloc(lt->reginfo.addr, + sizeof(struct __db_lock), MUTEX_ALIGN, &lp)) != 0) + goto mem_err; + lp->status = DB_LSTAT_FREE; + if ((ret = __db_shmutex_init(dbenv, &lp->mutex, + R_OFFSET(<->reginfo, &lp->mutex) + DB_FCNTL_OFF_LOCK, + MUTEX_SELF_BLOCK, <->reginfo, + (REGMAINT *)R_ADDR(<->reginfo, region->maint_off))) != 0) + return (ret); + MUTEX_LOCK(dbenv, &lp->mutex, lt->dbenv->lockfhp); + SH_TAILQ_INSERT_HEAD(®ion->free_locks, lp, links, __db_lock); + } + + /* Initialize objects onto a free list. */ + SH_TAILQ_INIT(®ion->dd_objs); + SH_TAILQ_INIT(®ion->free_objs); + for (i = 0; i < region->maxobjects; ++i) { + if ((ret = __db_shalloc(lt->reginfo.addr, + sizeof(DB_LOCKOBJ), 0, &op)) != 0) + goto mem_err; + SH_TAILQ_INSERT_HEAD( + ®ion->free_objs, op, links, __db_lockobj); + } + + /* Initialize lockers onto a free list. */ + SH_TAILQ_INIT(®ion->free_lockers); + for (i = 0; i < region->maxlockers; ++i) { + if ((ret = __db_shalloc(lt->reginfo.addr, + sizeof(DB_LOCKER), 0, &lidp)) != 0) { +mem_err: __db_err(dbenv, "Unable to allocate memory for the lock table"); + return (ret); + } + SH_TAILQ_INSERT_HEAD( + ®ion->free_lockers, lidp, links, __db_locker); + } + + return (0); +} + +/* + * __lock_close -- + * Internal version of lock_close: only called from db_appinit. + * + * PUBLIC: int __lock_close __P((DB_ENV *)); + */ +int +__lock_close(dbenv) + DB_ENV *dbenv; +{ + DB_LOCKTAB *lt; + int ret; + + lt = dbenv->lk_handle; + + /* Detach from the region. */ + ret = __db_r_detach(dbenv, <->reginfo, 0); + + __os_free(lt, sizeof(*lt)); + + dbenv->lk_handle = NULL; + return (ret); +} + +/* + * __lock_region_size -- + * Return the region size. + */ +static size_t +__lock_region_size(dbenv) + DB_ENV *dbenv; +{ + size_t retval; + + /* + * Figure out how much space we're going to need. This list should + * map one-to-one with the __db_shalloc calls in __lock_init. + */ + retval = 0; + retval += __db_shalloc_size(sizeof(DB_LOCKREGION), 1); + retval += __db_shalloc_size(dbenv->lk_modes * dbenv->lk_modes, 1); + retval += __db_shalloc_size( + __db_tablesize(dbenv->lk_max_lockers) * (sizeof(DB_HASHTAB)), 1); + retval += __db_shalloc_size( + __db_tablesize(dbenv->lk_max_objects) * (sizeof(DB_HASHTAB)), 1); +#ifdef MUTEX_SYSTEM_RESOURCES + retval += + __db_shalloc_size(sizeof(REGMAINT) + __lock_region_maint(dbenv), 1); +#endif + retval += __db_shalloc_size( + sizeof(struct __db_lock), MUTEX_ALIGN) * dbenv->lk_max; + retval += __db_shalloc_size(sizeof(DB_LOCKOBJ), 1) * dbenv->lk_max_objects; + retval += __db_shalloc_size(sizeof(DB_LOCKER), 1) * dbenv->lk_max_lockers; + + /* + * Include 16 bytes of string space per lock. DB doesn't use it + * because we pre-allocate lock space for DBTs in the structure. + */ + retval += __db_shalloc_size(dbenv->lk_max * 16, sizeof(size_t)); + + /* And we keep getting this wrong, let's be generous. */ + retval += retval / 4; + + return (retval); +} + +#ifdef MUTEX_SYSTEM_RESOURCES +/* + * __lock_region_maint -- + * Return the amount of space needed for region maintenance info. + */ +static size_t +__lock_region_maint(dbenv) + DB_ENV *dbenv; +{ + size_t s; + + s = sizeof(MUTEX *) * dbenv->lk_max; + return (s); +} +#endif + +/* + * __lock_region_destroy + * Destroy any region maintenance info. + * + * PUBLIC: void __lock_region_destroy __P((DB_ENV *, REGINFO *)); + */ +void +__lock_region_destroy(dbenv, infop) + DB_ENV *dbenv; + REGINFO *infop; +{ + DB_LOCKREGION *region; + + COMPQUIET(dbenv, NULL); + region = R_ADDR(infop, infop->rp->primary); + + __db_shlocks_destroy(infop, + (REGMAINT *)R_ADDR(infop, region->maint_off)); + return; +} diff --git a/bdb/lock/lock_stat.c b/bdb/lock/lock_stat.c new file mode 100644 index 00000000000..ed5b60d0d7a --- /dev/null +++ b/bdb/lock/lock_stat.c @@ -0,0 +1,308 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: lock_stat.c,v 11.4 2000/12/08 20:15:31 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <ctype.h> +#endif + +#ifdef HAVE_RPC +#include "db_server.h" +#endif + +#include "db_int.h" +#include "db_shash.h" +#include "lock.h" + +#ifdef HAVE_RPC +#include "gen_client_ext.h" +#include "rpc_client_ext.h" +#endif + +static void __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKER *, FILE *)); +static void __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *, FILE *)); +static const char * + __lock_dump_status __P((db_status_t)); + +/* + * lock_stat -- + * Return LOCK statistics. + */ +int +lock_stat(dbenv, statp, db_malloc) + DB_ENV *dbenv; + DB_LOCK_STAT **statp; + void *(*db_malloc) __P((size_t)); +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_LOCK_STAT *stats; + int ret; + +#ifdef HAVE_RPC + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) + return (__dbcl_lock_stat(dbenv, statp, db_malloc)); +#endif + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + + *statp = NULL; + + lt = dbenv->lk_handle; + + if ((ret = __os_malloc(dbenv, sizeof(*stats), db_malloc, &stats)) != 0) + return (ret); + + /* Copy out the global statistics. */ + R_LOCK(dbenv, <->reginfo); + + region = lt->reginfo.primary; + stats->st_lastid = region->id; + stats->st_maxlocks = region->maxlocks; + stats->st_maxlockers = region->maxlockers; + stats->st_maxobjects = region->maxobjects; + stats->st_nmodes = region->nmodes; + stats->st_nlockers = region->nlockers; + stats->st_maxnlockers = region->maxnlockers; + stats->st_nobjects = region->nobjects; + stats->st_maxnobjects = region->maxnobjects; + stats->st_nlocks = region->nlocks; + stats->st_maxnlocks = region->maxnlocks; + stats->st_nconflicts = region->nconflicts; + stats->st_nrequests = region->nrequests; + stats->st_nreleases = region->nreleases; + stats->st_nnowaits = region->nnowaits; + stats->st_ndeadlocks = region->ndeadlocks; + + stats->st_region_wait = lt->reginfo.rp->mutex.mutex_set_wait; + stats->st_region_nowait = lt->reginfo.rp->mutex.mutex_set_nowait; + stats->st_regsize = lt->reginfo.rp->size; + + R_UNLOCK(dbenv, <->reginfo); + + *statp = stats; + return (0); +} + +#define LOCK_DUMP_CONF 0x001 /* Conflict matrix. */ +#define LOCK_DUMP_FREE 0x002 /* Display lock free list. */ +#define LOCK_DUMP_LOCKERS 0x004 /* Display lockers. */ +#define LOCK_DUMP_MEM 0x008 /* Display region memory. */ +#define LOCK_DUMP_OBJECTS 0x010 /* Display objects. */ +#define LOCK_DUMP_ALL 0x01f /* Display all. */ + +/* + * __lock_dump_region -- + * + * PUBLIC: void __lock_dump_region __P((DB_ENV *, char *, FILE *)); + */ +void +__lock_dump_region(dbenv, area, fp) + DB_ENV *dbenv; + char *area; + FILE *fp; +{ + struct __db_lock *lp; + DB_LOCKER *lip; + DB_LOCKOBJ *op; + DB_LOCKREGION *lrp; + DB_LOCKTAB *lt; + u_int32_t flags, i, j; + int label; + + /* Make it easy to call from the debugger. */ + if (fp == NULL) + fp = stderr; + + for (flags = 0; *area != '\0'; ++area) + switch (*area) { + case 'A': + LF_SET(LOCK_DUMP_ALL); + break; + case 'c': + LF_SET(LOCK_DUMP_CONF); + break; + case 'f': + LF_SET(LOCK_DUMP_FREE); + break; + case 'l': + LF_SET(LOCK_DUMP_LOCKERS); + break; + case 'm': + LF_SET(LOCK_DUMP_MEM); + break; + case 'o': + LF_SET(LOCK_DUMP_OBJECTS); + break; + } + + lt = dbenv->lk_handle; + lrp = lt->reginfo.primary; + LOCKREGION(dbenv, lt); + + fprintf(fp, "%s\nLock region parameters\n", DB_LINE); + fprintf(fp, "%s: %lu, %s: %lu, %s: %lu, %s: %lu, %s: %lu, %s: %lu, %s: %lu\n", + "locker table size", (u_long)lrp->locker_t_size, + "object table size", (u_long)lrp->object_t_size, + "obj_off", (u_long)lrp->obj_off, + "osynch_off", (u_long)lrp->osynch_off, + "locker_off", (u_long)lrp->locker_off, + "lsynch_off", (u_long)lrp->lsynch_off, + "need_dd", (u_long)lrp->need_dd); + + if (LF_ISSET(LOCK_DUMP_CONF)) { + fprintf(fp, "\n%s\nConflict matrix\n", DB_LINE); + for (i = 0; i < lrp->nmodes; i++) { + for (j = 0; j < lrp->nmodes; j++) + fprintf(fp, "%lu\t", + (u_long)lt->conflicts[i * lrp->nmodes + j]); + fprintf(fp, "\n"); + } + } + + if (LF_ISSET(LOCK_DUMP_LOCKERS)) { + fprintf(fp, "%s\nLocker hash buckets\n", DB_LINE); + for (i = 0; i < lrp->locker_t_size; i++) { + label = 1; + for (lip = + SH_TAILQ_FIRST(<->locker_tab[i], __db_locker); + lip != NULL; + lip = SH_TAILQ_NEXT(lip, links, __db_locker)) { + if (label) { + fprintf(fp, "Bucket %lu:\n", (u_long)i); + label = 0; + } + __lock_dump_locker(lt, lip, fp); + } + } + } + + if (LF_ISSET(LOCK_DUMP_OBJECTS)) { + fprintf(fp, "%s\nObject hash buckets\n", DB_LINE); + for (i = 0; i < lrp->object_t_size; i++) { + label = 1; + for (op = SH_TAILQ_FIRST(<->obj_tab[i], __db_lockobj); + op != NULL; + op = SH_TAILQ_NEXT(op, links, __db_lockobj)) { + if (label) { + fprintf(fp, "Bucket %lu:\n", (u_long)i); + label = 0; + } + __lock_dump_object(lt, op, fp); + } + } + } + + if (LF_ISSET(LOCK_DUMP_FREE)) { + fprintf(fp, "%s\nLock free list\n", DB_LINE); + for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock); + lp != NULL; + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) + fprintf(fp, "0x%lx: %lu\t%lu\t%s\t0x%lx\n", (u_long)lp, + (u_long)lp->holder, (u_long)lp->mode, + __lock_dump_status(lp->status), (u_long)lp->obj); + + fprintf(fp, "%s\nObject free list\n", DB_LINE); + for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj); + op != NULL; + op = SH_TAILQ_NEXT(op, links, __db_lockobj)) + fprintf(fp, "0x%lx\n", (u_long)op); + + fprintf(fp, "%s\nLocker free list\n", DB_LINE); + for (lip = SH_TAILQ_FIRST(&lrp->free_lockers, __db_locker); + lip != NULL; + lip = SH_TAILQ_NEXT(lip, links, __db_locker)) + fprintf(fp, "0x%lx\n", (u_long)lip); + } + + if (LF_ISSET(LOCK_DUMP_MEM)) + __db_shalloc_dump(lt->reginfo.addr, fp); + + UNLOCKREGION(dbenv, lt); +} + +static void +__lock_dump_locker(lt, lip, fp) + DB_LOCKTAB *lt; + DB_LOCKER *lip; + FILE *fp; +{ + struct __db_lock *lp; + + fprintf(fp, "L %lx [%ld]", (u_long)lip->id, (long)lip->dd_id); + fprintf(fp, " %s ", F_ISSET(lip, DB_LOCKER_DELETED) ? "(D)" : " "); + + if ((lp = SH_LIST_FIRST(&lip->heldby, __db_lock)) == NULL) + fprintf(fp, "\n"); + else + for (; lp != NULL; + lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) + __lock_printlock(lt, lp, 1); +} + +static void +__lock_dump_object(lt, op, fp) + DB_LOCKTAB *lt; + DB_LOCKOBJ *op; + FILE *fp; +{ + struct __db_lock *lp; + u_int32_t j; + u_int8_t *ptr; + u_int ch; + + ptr = SH_DBT_PTR(&op->lockobj); + for (j = 0; j < op->lockobj.size; ptr++, j++) { + ch = *ptr; + fprintf(fp, isprint(ch) ? "%c" : "\\%o", ch); + } + fprintf(fp, "\n"); + + fprintf(fp, "H:"); + for (lp = + SH_TAILQ_FIRST(&op->holders, __db_lock); + lp != NULL; + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) + __lock_printlock(lt, lp, 1); + lp = SH_TAILQ_FIRST(&op->waiters, __db_lock); + if (lp != NULL) { + fprintf(fp, "\nW:"); + for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) + __lock_printlock(lt, lp, 1); + } +} + +static const char * +__lock_dump_status(status) + db_status_t status; +{ + switch (status) { + case DB_LSTAT_ABORTED: + return ("aborted"); + case DB_LSTAT_ERR: + return ("err"); + case DB_LSTAT_FREE: + return ("free"); + case DB_LSTAT_HELD: + return ("held"); + case DB_LSTAT_NOGRANT: + return ("nogrant"); + case DB_LSTAT_PENDING: + return ("pending"); + case DB_LSTAT_WAITING: + return ("waiting"); + } + return ("unknown status"); +} diff --git a/bdb/lock/lock_util.c b/bdb/lock/lock_util.c new file mode 100644 index 00000000000..fd5c6ad90cb --- /dev/null +++ b/bdb/lock/lock_util.c @@ -0,0 +1,138 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: lock_util.c,v 11.5 2000/07/04 18:28:24 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_shash.h" +#include "hash.h" +#include "lock.h" + +/* + * __lock_cmp -- + * This function is used to compare a DBT that is about to be entered + * into a hash table with an object already in the hash table. Note + * that it just returns true on equal and 0 on not-equal. Therefore + * this function cannot be used as a sort function; its purpose is to + * be used as a hash comparison function. + * + * PUBLIC: int __lock_cmp __P((const DBT *, DB_LOCKOBJ *)); + */ +int +__lock_cmp(dbt, lock_obj) + const DBT *dbt; + DB_LOCKOBJ *lock_obj; +{ + void *obj_data; + + obj_data = SH_DBT_PTR(&lock_obj->lockobj); + return (dbt->size == lock_obj->lockobj.size && + memcmp(dbt->data, obj_data, dbt->size) == 0); +} + +/* + * PUBLIC: int __lock_locker_cmp __P((u_int32_t, DB_LOCKER *)); + */ +int +__lock_locker_cmp(locker, sh_locker) + u_int32_t locker; + DB_LOCKER *sh_locker; +{ + return (locker == sh_locker->id); +} + +/* + * The next two functions are the hash functions used to store objects in the + * lock hash tables. They are hashing the same items, but one (__lock_ohash) + * takes a DBT (used for hashing a parameter passed from the user) and the + * other (__lock_lhash) takes a DB_LOCKOBJ (used for hashing something that is + * already in the lock manager). In both cases, we have a special check to + * fast path the case where we think we are doing a hash on a DB page/fileid + * pair. If the size is right, then we do the fast hash. + * + * We know that DB uses DB_LOCK_ILOCK types for its lock objects. The first + * four bytes are the 4-byte page number and the next DB_FILE_ID_LEN bytes + * are a unique file id, where the first 4 bytes on UNIX systems are the file + * inode number, and the first 4 bytes on Windows systems are the FileIndexLow + * bytes. So, we use the XOR of the page number and the first four bytes of + * the file id to produce a 32-bit hash value. + * + * We have no particular reason to believe that this algorithm will produce + * a good hash, but we want a fast hash more than we want a good one, when + * we're coming through this code path. + */ +#define FAST_HASH(P) { \ + u_int32_t __h; \ + u_int8_t *__cp, *__hp; \ + __hp = (u_int8_t *)&__h; \ + __cp = (u_int8_t *)(P); \ + __hp[0] = __cp[0] ^ __cp[4]; \ + __hp[1] = __cp[1] ^ __cp[5]; \ + __hp[2] = __cp[2] ^ __cp[6]; \ + __hp[3] = __cp[3] ^ __cp[7]; \ + return (__h); \ +} + +/* + * __lock_ohash -- + * + * PUBLIC: u_int32_t __lock_ohash __P((const DBT *)); + */ +u_int32_t +__lock_ohash(dbt) + const DBT *dbt; +{ + if (dbt->size == sizeof(DB_LOCK_ILOCK)) + FAST_HASH(dbt->data); + + return (__ham_func5(NULL, dbt->data, dbt->size)); +} + +/* + * __lock_lhash -- + * + * PUBLIC: u_int32_t __lock_lhash __P((DB_LOCKOBJ *)); + */ +u_int32_t +__lock_lhash(lock_obj) + DB_LOCKOBJ *lock_obj; +{ + void *obj_data; + + obj_data = SH_DBT_PTR(&lock_obj->lockobj); + + if (lock_obj->lockobj.size == sizeof(DB_LOCK_ILOCK)) + FAST_HASH(obj_data); + + return (__ham_func5(NULL, obj_data, lock_obj->lockobj.size)); +} + +/* + * __lock_locker_hash -- + * Hash function for entering lockers into the locker hash table. + * Since these are simply 32-bit unsigned integers, just return + * the locker value. + * + * PUBLIC: u_int32_t __lock_locker_hash __P((u_int32_t)); + */ +u_int32_t +__lock_locker_hash(locker) + u_int32_t locker; +{ + return (locker); +} |