summaryrefslogtreecommitdiff
path: root/bdb/lock
diff options
context:
space:
mode:
authorunknown <tim@threads.polyesthetic.msg>2001-03-04 19:42:05 -0500
committerunknown <tim@threads.polyesthetic.msg>2001-03-04 19:42:05 -0500
commitec6ae091617bdfdca9e65e8d3e65b950d234f676 (patch)
tree9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/lock
parent87d70fb598105b64b538ff6b81eef9da626255b1 (diff)
downloadmariadb-git-ec6ae091617bdfdca9e65e8d3e65b950d234f676.tar.gz
Import changeset
Diffstat (limited to 'bdb/lock')
-rw-r--r--bdb/lock/Design293
-rw-r--r--bdb/lock/lock.c1439
-rw-r--r--bdb/lock/lock_conflict.c34
-rw-r--r--bdb/lock/lock_deadlock.c637
-rw-r--r--bdb/lock/lock_method.c148
-rw-r--r--bdb/lock/lock_region.c430
-rw-r--r--bdb/lock/lock_stat.c308
-rw-r--r--bdb/lock/lock_util.c138
8 files changed, 3427 insertions, 0 deletions
diff --git a/bdb/lock/Design b/bdb/lock/Design
new file mode 100644
index 00000000000..ac8f0b02fbf
--- /dev/null
+++ b/bdb/lock/Design
@@ -0,0 +1,293 @@
+# $Id: Design,v 11.3 2000/02/19 20:58:03 bostic Exp $
+
+Synchronization in the Locking Subsystem
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+1. Data structures
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+
+The lock manager maintains 3 different structures:
+
+Objects (__db_lockobj):
+ Describes an object that is locked. When used with DB, this consists
+ of a __db_ilock (a file identifier and a page number).
+
+Lockers (__db_locker):
+ Identifies a specific locker ID and maintains the head of a list of
+ locks held by a locker (for using during transaction commit/abort).
+
+Locks (__db_lock):
+ Describes a particular object lock held on behalf of a particular
+ locker id.
+
+Objects and Lockers reference Locks.
+
+These structures are organized via two synchronized hash tables. Each
+hash table consists of two physical arrays: the array of actual hash
+buckets and an array of mutexes so we can lock individual buckets, rather
+than the whole table.
+
+One hash table contains Objects and the other hash table contains Lockers.
+Objects contain two lists of locks, waiters and holders: holders currently
+hold a lock on the Object, waiters are lock waiting to be granted.
+Lockers are a single linked list that connects the Locks held on behalf
+of the specific locker ID.
+
+In the diagram below:
+
+Locker ID #1 holds a lock on Object #1 (L1) and Object #2 (L5), and is
+waiting on a lock on Object #1 (L3).
+
+Locker ID #2 holds a lock on Object #1 (L2) and is waiting on a lock for
+Object #2 (L7).
+
+Locker ID #3 is waiting for a lock on Object #2 (L6).
+
+ OBJECT -----------------------
+ HASH | |
+ ----|------------- |
+ ________ _______ | | ________ | |
+ | |-->| O1 |--|---|-->| O2 | | |
+ |_______| |_____| | | |______| V |
+ | | W H--->L1->L2 W H--->L5 | holders
+ |_______| | | | | V
+ | | ------->L3 \ ------->L6------>L7 waiters
+ |_______| / \ \
+ . . / \ \
+ . . | \ \
+ . . | \ -----------
+ |_______| | -------------- |
+ | | ____|____ ___|_____ _|______
+ |_______| | | | | | |
+ | | | LID1 | | LID2 | | LID3 |
+ |_______| |_______| |_______| |______|
+ ^ ^ ^
+ | | |
+ ___|________________________|________|___
+ LOCKER | | | | | | | | |
+ HASH | | | | | | | | |
+ | | | | | | | | |
+ |____|____|____|____|____|____|____|____|
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+2. Synchronization
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+
+There are four types of mutexes in the subsystem.
+
+Object mutexes;
+ These map one-to-one to each bucket in the Object hash table.
+ Holding a mutex on an Object bucket secures all the Objects in
+ that bucket as well as the Lock structures linked from those
+ Objects. All fields in the Locks EXCEPT the Locker links (the
+ links that attach Locks by Locker ID) are protected by these
+ mutexes.
+
+Locker mutexes:
+ These map one-to-one to each bucket in the Locker hash table.
+ Holding a mutex on a Locker bucket secures the Locker structures
+ and the Locker links in the Locks.
+
+Memory mutex:
+ This mutex allows calls to allocate/free memory, i.e. calls to
+ __db_shalloc and __db_shalloc_free, as well as manipulation of
+ the Object, Locker and Lock free lists.
+
+Region mutex:
+ This mutex is currently only used to protect the locker ids.
+ It may also be needed later to provide exclusive access to
+ the region for deadlock detection.
+
+Creating or removing a Lock requires locking both the Object lock and the
+Locker lock (and eventually the shalloc lock to return the item to the
+free list).
+
+The locking hierarchy is as follows:
+
+ The Region mutex may never be acquired after any other mutex.
+
+ The Object mutex may be acquired after the Region mutex.
+
+ The Locker mutex may be acquired after the Region and Object
+ mutexes.
+
+ The Memory mutex may be acquired after any mutex.
+
+So, if both and Object mutex and a Locker mutex are going to be acquired,
+the Object mutex must be acquired first.
+
+The Memory mutex may be acquired after any other mutex, but no other mutexes
+can be acquired once the Memory mutex is held.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+3. The algorithms:
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+The locking subsystem supports four basic operations:
+ Get a Lock (lock_get)
+
+ Release a Lock (lock_put)
+
+ Release all the Locks on a specific Object (lock_vec)
+
+ Release all the Locks for a specific Locker (lock_vec)
+
+Get a lock:
+ Acquire Object bucket mutex.
+ Acquire Locker bucket mutex.
+
+ Acquire Memory mutex.
+ If the Object does not exist
+ Take an Object off the freelist.
+ If the Locker doesn't exist
+ Take a Locker off the freelist.
+ Take a Lock off the free list.
+ Release Memory mutex.
+
+ Add Lock to the Object list.
+ Add Lock to the Locker list.
+ Release Locker bucket mutex
+
+ If the lock cannot be granted
+ Release Object bucket mutex
+ Acquire lock mutex (blocks)
+
+ Acquire Object bucket mutex
+ If lock acquisition did not succeed (e.g, deadlock)
+ Acquire Locker bucket mutex
+ If locker should be destroyed
+ Remove locker from hash table
+ Acquire Memory mutex
+ Return locker to free list
+ Release Memory mutex
+ Release Locker bucket mutex
+
+ If object should be released
+ Acquire Memory mutex
+ Return object to free list
+ Release Memory mutex
+
+ Release Object bucket mutex
+
+Release a lock:
+ Acquire Object bucket mutex.
+ (Requires that we be able to find the Object hash bucket
+ without looking inside the Lock itself.)
+
+ If releasing a single lock and the user provided generation number
+ doesn't match the Lock's generation number, the Lock has been reused
+ and we return failure.
+
+ Enter lock_put_internal:
+ if the Lock is still on the Object's lists:
+ Increment Lock's generation number.
+ Remove Lock from the Object's list (NULL link fields).
+ Promote locks for the Object.
+
+ Enter locker_list_removal
+ Acquire Locker bucket mutex.
+ If Locker doesn't exist:
+ Release Locker bucket mutex
+ Release Object bucket mutex
+ Return error.
+ Else if Locker marked as deleted:
+ dont_release = TRUE
+ Else
+ Remove Lock from Locker list.
+ If Locker has no more locks
+ Remove Locker from table.
+ Acquire Memory mutex.
+ Return Locker to free list
+ Release Memory mutex
+ Release Locker bucket mutex.
+ Exit locker_list_removal
+
+ If (!dont_release)
+ Acquire Memory mutex
+ Return Lock to free list
+ Release Memory mutex
+
+ Exit lock_put_internal
+
+ Release Object bucket mutex
+
+Release all the Locks on a specific Object (lock_vec, DB_PUT_ALL_OBJ):
+
+ Acquire Object bucket mutex.
+
+ For each lock on the waiter list:
+ lock_put_internal
+ For each lock on the holder list:
+ lock_put_internal
+
+ Release Object bucket mutex.
+
+Release all the Locks for a specific Locker (lock_vec, DB_PUT_ALL):
+
+ Acquire Locker bucket mutex.
+ Mark Locker deleted.
+ Release Locker mutex.
+
+ For each lock on the Locker's list:
+ Remove from locker's list
+ (The lock could get put back on the free list in
+ lock_put and then could get reallocated and the
+ act of setting its locker links could clobber us.)
+ Perform "Release a Lock" above: skip locker_list_removal.
+
+ Acquire Locker bucket mutex.
+ Remove Locker
+ Release Locker mutex.
+
+ Acquire Memory mutex
+ Return Locker to free list
+ Release Memory mutex
+
+Deadlock detection (lock_detect):
+
+ For each bucket in Object table
+ Acquire the Object bucket mutex.
+ create waitsfor
+
+ For each bucket in Object table
+ Release the Object mutex.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+FAQ:
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+Q: Why do you need generation numbers?
+A: If a lock has been released due to a transaction abort (potentially in a
+ different process), and then lock is released by a thread of control
+ unaware of the abort, the lock might have potentially been re-allocated
+ to a different object. The generation numbers detect this problem.
+
+ Note, we assume that reads/writes of lock generation numbers are atomic,
+ if they are not, it is theoretically possible that a re-allocated lock
+ could be mistaken for another lock.
+
+Q: Why is is safe to walk the Locker list without holding any mutexes at
+ all?
+A: Locks are created with both the Object and Locker bucket mutexes held.
+ Once created, they removed in two ways:
+
+ a) when a specific Lock is released, in which case, the Object and
+ Locker bucket mutexes are again held, and
+
+ b) when all Locks for a specific Locker Id is released.
+
+ In case b), the Locker bucket mutex is held while the Locker chain is
+ marked as "destroyed", which blocks any further access to the Locker
+ chain. Then, each individual Object bucket mutex is acquired when each
+ individual Lock is removed.
+
+Q: What are the implications of doing fine grain locking?
+
+A: Since we no longer globally lock the entire region, lock_vec will no
+ longer be atomic. We still execute the items in a lock_vec in order,
+ so things like lock-coupling still work, but you can't make any
+ guarantees about atomicity.
+
+Q: How do I configure for FINE_GRAIN locking?
+
+A: We currently do not support any automatic configuration for FINE_GRAIN
+ locking. When we do, will need to document that atomicity discussion
+ listed above (it is bug-report #553).
diff --git a/bdb/lock/lock.c b/bdb/lock/lock.c
new file mode 100644
index 00000000000..8d246f7ded3
--- /dev/null
+++ b/bdb/lock/lock.c
@@ -0,0 +1,1439 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: lock.c,v 11.40 2000/12/19 23:18:58 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "log.h"
+#include "db_am.h"
+#include "txn.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+static int __lock_checklocker __P((DB_LOCKTAB *,
+ struct __db_lock *, u_int32_t, u_int32_t, int *));
+static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t,
+ u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *));
+static int __lock_is_parent __P((DB_LOCKTAB *, u_int32_t, DB_LOCKER *));
+static int __lock_put_internal __P((DB_LOCKTAB *,
+ struct __db_lock *, u_int32_t, u_int32_t));
+static int __lock_put_nolock __P((DB_ENV *, DB_LOCK *, int *, int));
+static void __lock_remove_waiter __P((DB_ENV *,
+ DB_LOCKOBJ *, struct __db_lock *, db_status_t));
+
+static const char __db_lock_err[] = "Lock table is out of available %s";
+static const char __db_lock_invalid[] = "%s: Lock is no longer valid";
+static const char __db_locker_invalid[] = "Locker is not valid";
+
+/*
+ * lock_id --
+ * Generate a unique locker id.
+ */
+int
+lock_id(dbenv, idp)
+ DB_ENV *dbenv;
+ u_int32_t *idp;
+{
+ DB_LOCKTAB *lt;
+ DB_LOCKREGION *region;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_lock_id(dbenv, idp));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK);
+
+ lt = dbenv->lk_handle;
+ region = lt->reginfo.primary;
+
+ /*
+ * Note that we are letting locker IDs wrap.
+ *
+ * This is potentially dangerous in that it's conceivable that you
+ * could be allocating a new locker id and still have someone using
+ * it. However, the alternatives are that we keep a bitmap of
+ * locker ids or we forbid wrapping. Both are probably bad. The
+ * bitmap of locker ids will take up 64 MB of space. Forbidding
+ * wrapping means that we'll run out of locker IDs after 2 billion.
+ * In order for the wrap bug to fire, we'd need to have something
+ * that stayed open while 2 billion locker ids were used up. Since
+ * we cache cursors it means that something would have to stay open
+ * sufficiently long that we open and close a lot of files and a
+ * lot of cursors within them. Betting that this won't happen seems
+ * to the lesser of the evils.
+ */
+ LOCKREGION(dbenv, lt);
+ if (region->id >= DB_LOCK_MAXID)
+ region->id = 0;
+ *idp = ++region->id;
+ UNLOCKREGION(dbenv, lt);
+
+ return (0);
+}
+
+/*
+ * Vector lock routine. This function takes a set of operations
+ * and performs them all at once. In addition, lock_vec provides
+ * functionality for lock inheritance, releasing all locks for a
+ * given locker (used during transaction commit/abort), releasing
+ * all locks on a given object, and generating debugging information.
+ */
+int
+lock_vec(dbenv, locker, flags, list, nlist, elistp)
+ DB_ENV *dbenv;
+ u_int32_t locker, flags;
+ int nlist;
+ DB_LOCKREQ *list, **elistp;
+{
+ struct __db_lock *lp, *next_lock;
+ DB_LOCKER *sh_locker, *sh_parent;
+ DB_LOCKOBJ *obj, *sh_obj;
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ u_int32_t lndx, ndx;
+ int did_abort, i, ret, run_dd;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_lock_vec(dbenv, locker,
+ flags, list, nlist, elistp));
+#endif
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK);
+
+ /* Validate arguments. */
+ if ((ret = __db_fchk(dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0)
+ return (ret);
+
+ lt = dbenv->lk_handle;
+ region = lt->reginfo.primary;
+
+ run_dd = 0;
+ LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle);
+ for (i = 0, ret = 0; i < nlist && ret == 0; i++)
+ switch (list[i].op) {
+ case DB_LOCK_GET:
+ ret = __lock_get_internal(dbenv->lk_handle,
+ locker, flags,
+ list[i].obj, list[i].mode, &list[i].lock);
+ break;
+ case DB_LOCK_INHERIT:
+
+ /*
+ * Get the committing locker and mark it as deleted.
+ * This allows us to traverse the locker links without
+ * worrying that someone else is deleting locks out
+ * from under us. However, if the locker doesn't
+ * exist, that just means that the child holds no
+ * locks, so inheritance is easy!
+ */
+ LOCKER_LOCK(lt, region, locker, ndx);
+ if ((ret = __lock_getlocker(lt,
+ locker, ndx, 0, &sh_locker)) != 0 ||
+ sh_locker == NULL ||
+ F_ISSET(sh_locker, DB_LOCKER_DELETED)) {
+ if (ret == 0 && sh_locker != NULL)
+ ret = EACCES;
+ __db_err(dbenv, __db_locker_invalid);
+ break;
+ }
+
+ /* Make sure we are a child transaction. */
+ if (sh_locker->parent_locker == INVALID_ROFF) {
+ __db_err(dbenv, "Not a child transaction");
+ ret = EINVAL;
+ break;
+ }
+ sh_parent = (DB_LOCKER *)
+ R_ADDR(&lt->reginfo, sh_locker->parent_locker);
+ F_SET(sh_locker, DB_LOCKER_DELETED);
+
+ /*
+ * Now, lock the parent locker; move locks from
+ * the committing list to the parent's list.
+ */
+ LOCKER_LOCK(lt, region, locker, ndx);
+ if (F_ISSET(sh_parent, DB_LOCKER_DELETED)) {
+ if (ret == 0) {
+ __db_err(dbenv,
+ "Parent locker is not valid");
+ ret = EACCES;
+ }
+ break;
+ }
+
+ for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
+ lp != NULL;
+ lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
+ SH_LIST_REMOVE(lp, locker_links, __db_lock);
+ SH_LIST_INSERT_HEAD(&sh_parent->heldby, lp,
+ locker_links, __db_lock);
+ lp->holder = sh_parent->id;
+
+ /* Get the object associated with this lock. */
+ obj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
+
+ (void)__lock_promote(lt, obj,
+ LF_ISSET(DB_LOCK_NOWAITERS));
+ }
+
+ /* Now free the original locker. */
+ ret = __lock_checklocker(lt,
+ NULL, locker, DB_LOCK_IGNOREDEL, NULL);
+ break;
+ case DB_LOCK_PUT:
+ ret =
+ __lock_put_nolock(dbenv, &list[i].lock, &run_dd, 0);
+ break;
+ case DB_LOCK_PUT_ALL:
+ /*
+ * Get the locker and mark it as deleted. This
+ * allows us to traverse the locker links without
+ * worrying that someone else is deleting locks out
+ * from under us. Since the locker may hold no
+ * locks (i.e., you could call abort before you've
+ * done any work), it's perfectly reasonable for there
+ * to be no locker; this is not an error.
+ */
+ LOCKER_LOCK(lt, region, locker, ndx);
+ if ((ret = __lock_getlocker(lt,
+ locker, ndx, 0, &sh_locker)) != 0 ||
+ sh_locker == NULL ||
+ F_ISSET(sh_locker, DB_LOCKER_DELETED))
+ /*
+ * If ret is set, then we'll generate an
+ * error. If it's not set, we have nothing
+ * to do.
+ */
+ break;
+ F_SET(sh_locker, DB_LOCKER_DELETED);
+
+ /* Now traverse the locks, releasing each one. */
+ for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
+ lp != NULL;
+ lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
+ SH_LIST_REMOVE(lp, locker_links, __db_lock);
+ sh_obj =
+ (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
+ SHOBJECT_LOCK(lt, region, sh_obj, lndx);
+ ret = __lock_put_internal(lt,
+ lp, lndx, DB_LOCK_FREE | DB_LOCK_DOALL);
+ if (ret != 0)
+ break;
+ }
+ ret = __lock_checklocker(lt,
+ NULL, locker, DB_LOCK_IGNOREDEL, NULL);
+ break;
+ case DB_LOCK_PUT_OBJ:
+ /* Remove all the locks associated with an object. */
+ OBJECT_LOCK(lt, region, list[i].obj, ndx);
+ if ((ret = __lock_getobj(lt, list[i].obj,
+ ndx, 0, &sh_obj)) != 0 || sh_obj == NULL) {
+ if (ret == 0)
+ ret = EINVAL;
+ break;
+ }
+
+ /*
+ * Go through both waiters and holders. Don't bother
+ * to run promotion, because everyone is getting
+ * released. The processes waiting will still get
+ * awakened as their waiters are released.
+ */
+ for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
+ ret == 0 && lp != NULL;
+ lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock))
+ ret = __lock_put_internal(lt,
+ lp, ndx, DB_LOCK_NOPROMOTE | DB_LOCK_DOALL);
+
+ /*
+ * On the last time around, the object will get
+ * reclaimed by __lock_put_internal, structure the
+ * loop carefully so we do not get bitten.
+ */
+ for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
+ ret == 0 && lp != NULL;
+ lp = next_lock) {
+ next_lock = SH_TAILQ_NEXT(lp, links, __db_lock);
+ ret = __lock_put_internal(lt,
+ lp, ndx, DB_LOCK_NOPROMOTE | DB_LOCK_DOALL);
+ }
+ break;
+#ifdef DEBUG
+ case DB_LOCK_DUMP:
+ /* Find the locker. */
+ LOCKER_LOCK(lt, region, locker, ndx);
+ if ((ret = __lock_getlocker(lt,
+ locker, ndx, 0, &sh_locker)) != 0
+ || sh_locker == NULL
+ || F_ISSET(sh_locker, DB_LOCKER_DELETED))
+ break;
+
+ for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
+ lp != NULL;
+ lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) {
+ __lock_printlock(lt, lp, 1);
+ }
+ break;
+#endif
+ default:
+ __db_err(dbenv,
+ "Invalid lock operation: %d", list[i].op);
+ ret = EINVAL;
+ break;
+ }
+
+ if (ret == 0 && region->need_dd && region->detect != DB_LOCK_NORUN) {
+ run_dd = 1;
+ region->need_dd = 0;
+ }
+ UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle);
+
+ if (run_dd)
+ (void)lock_detect(dbenv, 0, region->detect, &did_abort);
+
+ if (ret != 0 && elistp != NULL)
+ *elistp = &list[i - 1];
+
+ return (ret);
+}
+
+/*
+ * Lock acquisition routines. There are two library interfaces:
+ *
+ * lock_get --
+ * original lock get interface that takes a locker id.
+ *
+ * All the work for lock_get (and for the GET option of lock_vec) is done
+ * inside of lock_get_internal.
+ */
+int
+lock_get(dbenv, locker, flags, obj, lock_mode, lock)
+ DB_ENV *dbenv;
+ u_int32_t locker, flags;
+ const DBT *obj;
+ db_lockmode_t lock_mode;
+ DB_LOCK *lock;
+{
+ int ret;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_lock_get(dbenv, locker,
+ flags, obj, lock_mode, lock));
+#endif
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK);
+
+ if (IS_RECOVERING(dbenv)) {
+ lock->off = LOCK_INVALID;
+ return (0);
+ }
+
+ /* Validate arguments. */
+ if ((ret = __db_fchk(dbenv,
+ "lock_get", flags,
+ DB_LOCK_NOWAIT | DB_LOCK_UPGRADE | DB_LOCK_SWITCH)) != 0)
+ return (ret);
+
+ LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle);
+ ret = __lock_get_internal(dbenv->lk_handle,
+ locker, flags, obj, lock_mode, lock);
+ UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle);
+ return (ret);
+}
+
+static int
+__lock_get_internal(lt, locker, flags, obj, lock_mode, lock)
+ DB_LOCKTAB *lt;
+ u_int32_t locker, flags;
+ const DBT *obj;
+ db_lockmode_t lock_mode;
+ DB_LOCK *lock;
+{
+ struct __db_lock *newl, *lp;
+ DB_ENV *dbenv;
+ DB_LOCKER *sh_locker;
+ DB_LOCKOBJ *sh_obj;
+ DB_LOCKREGION *region;
+ u_int32_t locker_ndx;
+ int did_abort, freed, ihold, on_locker_list, no_dd, ret;
+
+ no_dd = ret = 0;
+ on_locker_list = 0;
+ region = lt->reginfo.primary;
+ dbenv = lt->dbenv;
+
+ /*
+ * If we are not going to reuse this lock, initialize
+ * the offset to invalid so that if we fail it
+ * will not look like a valid lock.
+ */
+ if (!LF_ISSET(DB_LOCK_UPGRADE | DB_LOCK_SWITCH))
+ lock->off = LOCK_INVALID;
+
+ /*
+ * Check that the lock mode is valid.
+ */
+ if ((u_int32_t)lock_mode >= region->nmodes) {
+ __db_err(dbenv,
+ "lock_get: invalid lock mode %lu\n", (u_long)lock_mode);
+ return (EINVAL);
+ }
+
+ /* Allocate a new lock. Optimize for the common case of a grant. */
+ region->nrequests++;
+ if ((newl = SH_TAILQ_FIRST(&region->free_locks, __db_lock)) != NULL)
+ SH_TAILQ_REMOVE(&region->free_locks, newl, links, __db_lock);
+ if (newl == NULL) {
+ __db_err(dbenv, __db_lock_err, "locks");
+ return (ENOMEM);
+ }
+ if (++region->nlocks > region->maxnlocks)
+ region->maxnlocks = region->nlocks;
+
+ /* Allocate a new object. */
+ OBJECT_LOCK(lt, region, obj, lock->ndx);
+ if ((ret = __lock_getobj(lt, obj, lock->ndx, 1, &sh_obj)) != 0)
+ goto err;
+
+ /* Get the locker, we may need it to find our parent. */
+ LOCKER_LOCK(lt, region, locker, locker_ndx);
+ if ((ret =
+ __lock_getlocker(lt, locker, locker_ndx, 1, &sh_locker)) != 0) {
+ /*
+ * XXX: Margo
+ * CLEANUP the object and the lock.
+ */
+ return (ret);
+ }
+
+ /*
+ * Now we have a lock and an object and we need to see if we should
+ * grant the lock. We use a FIFO ordering so we can only grant a
+ * new lock if it does not conflict with anyone on the holders list
+ * OR anyone on the waiters list. The reason that we don't grant if
+ * there's a conflict is that this can lead to starvation (a writer
+ * waiting on a popularly read item will never be granted). The
+ * downside of this is that a waiting reader can prevent an upgrade
+ * from reader to writer, which is not uncommon.
+ *
+ * There is one exception to the no-conflict rule. If a lock is held
+ * by the requesting locker AND the new lock does not conflict with
+ * any other holders, then we grant the lock. The most common place
+ * this happens is when the holder has a WRITE lock and a READ lock
+ * request comes in for the same locker. If we do not grant the read
+ * lock, then we guarantee deadlock.
+ *
+ * In case of conflict, we put the new lock on the end of the waiters
+ * list, unless we are upgrading in which case the locker goes on the
+ * front of the list.
+ */
+ ihold = 0;
+ lp = NULL;
+ if (LF_ISSET(DB_LOCK_SWITCH))
+ goto put_lock;
+
+ for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
+ lp != NULL;
+ lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+ if (locker == lp->holder ||
+ __lock_is_parent(lt, lp->holder, sh_locker)) {
+ if (lp->mode == lock_mode &&
+ lp->status == DB_LSTAT_HELD) {
+ if (LF_ISSET(DB_LOCK_UPGRADE))
+ goto upgrade;
+
+ /*
+ * Lock is held, so we can increment the
+ * reference count and return this lock.
+ */
+ lp->refcount++;
+ lock->off = R_OFFSET(&lt->reginfo, lp);
+ lock->gen = lp->gen;
+
+ ret = 0;
+ goto done;
+ } else
+ ihold = 1;
+ } else if (CONFLICTS(lt, region, lp->mode, lock_mode))
+ break;
+ }
+
+ /*
+ * Make the new lock point to the new object, initialize fields.
+ *
+ * This lock is not linked in anywhere, so we can muck with it
+ * without holding any mutexes.
+ */
+put_lock:
+ newl->holder = locker;
+ newl->refcount = 1;
+ newl->mode = lock_mode;
+ newl->obj = SH_PTR_TO_OFF(newl, sh_obj);
+ newl->status = DB_LSTAT_HELD;
+
+ /*
+ * If we are upgrading, then there are two scenarios. Either
+ * we had no conflicts, so we can do the upgrade. Or, there
+ * is a conflict and we should wait at the HEAD of the waiters
+ * list.
+ */
+ if (LF_ISSET(DB_LOCK_UPGRADE)) {
+ if (lp == NULL)
+ goto upgrade;
+
+ /*
+ * There was a conflict, wait. If this is the first waiter,
+ * add the object to the deadlock detector's list.
+ */
+ if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL)
+ SH_TAILQ_INSERT_HEAD(&region->dd_objs,
+ sh_obj, dd_links, __db_lockobj);
+
+ SH_TAILQ_INSERT_HEAD(&sh_obj->waiters, newl, links, __db_lock);
+ goto llist;
+ }
+
+ if (lp == NULL && !ihold)
+ for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
+ lp != NULL;
+ lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+ if (CONFLICTS(lt, region, lp->mode, lock_mode) &&
+ locker != lp->holder)
+ break;
+ }
+ if (!LF_ISSET(DB_LOCK_SWITCH) && lp == NULL)
+ SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links);
+ else if (!LF_ISSET(DB_LOCK_NOWAIT)) {
+ /*
+ * If this is the first waiter, add the object to the
+ * deadlock detector's list.
+ */
+ if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL)
+ SH_TAILQ_INSERT_HEAD(&region->dd_objs,
+ sh_obj, dd_links, __db_lockobj);
+ SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links);
+ } else {
+ ret = DB_LOCK_NOTGRANTED;
+ if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL
+ && LOCKER_FREEABLE(sh_locker))
+ __lock_freelocker( lt, region, sh_locker, locker_ndx);
+ region->nnowaits++;
+ goto err;
+ }
+
+llist:
+ /*
+ * Now, insert the lock onto its locker's list. If the locker does
+ * not currently hold any locks, there's no reason to run a deadlock
+ * detector, save that information.
+ */
+ on_locker_list = 1;
+ no_dd = sh_locker->master_locker == INVALID_ROFF
+ && SH_LIST_FIRST(&sh_locker->child_locker, __db_locker) == NULL
+ && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL;
+
+ SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock);
+
+ if (LF_ISSET(DB_LOCK_SWITCH) || lp != NULL) {
+ if (LF_ISSET(DB_LOCK_SWITCH) &&
+ (ret = __lock_put_nolock(dbenv,
+ lock, &ihold, DB_LOCK_NOWAITERS)) != 0)
+ goto err;
+ /*
+ * This is really a blocker for the thread. It should be
+ * initialized locked, so that when we try to acquire it, we
+ * block.
+ */
+ newl->status = DB_LSTAT_WAITING;
+ region->nconflicts++;
+ if (region->detect == DB_LOCK_NORUN)
+ region->need_dd = 1;
+ UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle);
+
+ /*
+ * We are about to wait; before waiting, see if the deadlock
+ * detector should be run.
+ */
+ if (region->detect != DB_LOCK_NORUN && !no_dd)
+ (void)lock_detect(dbenv, 0, region->detect, &did_abort);
+
+ MUTEX_LOCK(dbenv, &newl->mutex, dbenv->lockfhp);
+ LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle);
+
+ if (newl->status != DB_LSTAT_PENDING) {
+ (void)__lock_checklocker(lt,
+ newl, newl->holder, 0, &freed);
+ switch (newl->status) {
+ case DB_LSTAT_ABORTED:
+ on_locker_list = 0;
+ ret = DB_LOCK_DEADLOCK;
+ break;
+ case DB_LSTAT_NOGRANT:
+ ret = DB_LOCK_NOTGRANTED;
+ break;
+ default:
+ ret = EINVAL;
+ break;
+ }
+ goto err;
+ } else if (LF_ISSET(DB_LOCK_UPGRADE)) {
+ /*
+ * The lock that was just granted got put on the
+ * holders list. Since we're upgrading some other
+ * lock, we've got to remove it here.
+ */
+ SH_TAILQ_REMOVE(
+ &sh_obj->holders, newl, links, __db_lock);
+ /*
+ * Ensure that the object is not believed to be on
+ * the object's lists, if we're traversing by locker.
+ */
+ newl->links.stqe_prev = -1;
+ goto upgrade;
+ } else
+ newl->status = DB_LSTAT_HELD;
+ }
+
+ lock->off = R_OFFSET(&lt->reginfo, newl);
+ lock->gen = newl->gen;
+
+ return (0);
+
+upgrade:/*
+ * This was an upgrade, so return the new lock to the free list and
+ * upgrade the mode of the original lock.
+ */
+ ((struct __db_lock *)R_ADDR(&lt->reginfo, lock->off))->mode = lock_mode;
+
+ ret = 0;
+ /* FALLTHROUGH */
+
+done:
+err: newl->status = DB_LSTAT_FREE;
+ if (on_locker_list) {
+ SH_LIST_REMOVE(newl, locker_links, __db_lock);
+ }
+ SH_TAILQ_INSERT_HEAD(&region->free_locks, newl, links, __db_lock);
+ region->nlocks--;
+ return (ret);
+}
+
+/*
+ * Lock release routines.
+ *
+ * The user callable one is lock_put and the three we use internally are
+ * __lock_put_nolock, __lock_put_internal and __lock_downgrade.
+ */
+int
+lock_put(dbenv, lock)
+ DB_ENV *dbenv;
+ DB_LOCK *lock;
+{
+ DB_LOCKTAB *lt;
+ int ret, run_dd;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_lock_put(dbenv, lock));
+#endif
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK);
+
+ if (IS_RECOVERING(dbenv))
+ return (0);
+
+ lt = dbenv->lk_handle;
+
+ LOCKREGION(dbenv, lt);
+ ret = __lock_put_nolock(dbenv, lock, &run_dd, 0);
+ UNLOCKREGION(dbenv, lt);
+
+ if (ret == 0 && run_dd)
+ (void)lock_detect(dbenv, 0,
+ ((DB_LOCKREGION *)lt->reginfo.primary)->detect, NULL);
+ return (ret);
+}
+
+static int
+__lock_put_nolock(dbenv, lock, runp, flags)
+ DB_ENV *dbenv;
+ DB_LOCK *lock;
+ int *runp;
+ int flags;
+{
+ struct __db_lock *lockp;
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ u_int32_t locker;
+ int ret;
+
+ lt = dbenv->lk_handle;
+ region = lt->reginfo.primary;
+
+ lockp = (struct __db_lock *)R_ADDR(&lt->reginfo, lock->off);
+ lock->off = LOCK_INVALID;
+ if (lock->gen != lockp->gen) {
+ __db_err(dbenv, __db_lock_invalid, "lock_put");
+ return (EACCES);
+ }
+
+ locker = lockp->holder;
+ ret = __lock_put_internal(lt,
+ lockp, lock->ndx, flags | DB_LOCK_UNLINK | DB_LOCK_FREE);
+
+ *runp = 0;
+ if (ret == 0 && region->need_dd && region->detect != DB_LOCK_NORUN) {
+ *runp = 1;
+ region->need_dd = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * __lock_downgrade --
+ * Used by the concurrent access product to downgrade write locks
+ * back to iwrite locks.
+ *
+ * PUBLIC: int __lock_downgrade __P((DB_ENV *,
+ * PUBLIC: DB_LOCK *, db_lockmode_t, u_int32_t));
+ */
+int
+__lock_downgrade(dbenv, lock, new_mode, flags)
+ DB_ENV *dbenv;
+ DB_LOCK *lock;
+ db_lockmode_t new_mode;
+ u_int32_t flags;
+{
+ struct __db_lock *lockp;
+ DB_LOCKOBJ *obj;
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ int ret;
+
+ COMPQUIET(flags, 0);
+
+ PANIC_CHECK(dbenv);
+
+ lt = dbenv->lk_handle;
+ region = lt->reginfo.primary;
+
+ LOCKREGION(dbenv, lt);
+
+ lockp = (struct __db_lock *)R_ADDR(&lt->reginfo, lock->off);
+ if (lock->gen != lockp->gen) {
+ __db_err(dbenv, __db_lock_invalid, "lock_downgrade");
+ ret = EACCES;
+ goto out;
+ }
+
+ lockp->mode = new_mode;
+
+ /* Get the object associated with this lock. */
+ obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
+ (void)__lock_promote(lt, obj, LF_ISSET(DB_LOCK_NOWAITERS));
+
+ ++region->nreleases;
+out: UNLOCKREGION(dbenv, lt);
+
+ return (0);
+}
+
+static int
+__lock_put_internal(lt, lockp, obj_ndx, flags)
+ DB_LOCKTAB *lt;
+ struct __db_lock *lockp;
+ u_int32_t obj_ndx;
+ u_int32_t flags;
+{
+ DB_LOCKOBJ *sh_obj;
+ DB_LOCKREGION *region;
+ int no_reclaim, ret, state_changed;
+
+ region = lt->reginfo.primary;
+ no_reclaim = ret = state_changed = 0;
+
+ if (!OBJ_LINKS_VALID(lockp)) {
+ /*
+ * Someone removed this lock while we were doing a release
+ * by locker id. We are trying to free this lock, but it's
+ * already been done; all we need to do is return it to the
+ * free list.
+ */
+ lockp->status = DB_LSTAT_FREE;
+ SH_TAILQ_INSERT_HEAD(
+ &region->free_locks, lockp, links, __db_lock);
+ region->nlocks--;
+ return (0);
+ }
+
+ if (LF_ISSET(DB_LOCK_DOALL))
+ region->nreleases += lockp->refcount;
+ else
+ region->nreleases++;
+
+ if (!LF_ISSET(DB_LOCK_DOALL) && lockp->refcount > 1) {
+ lockp->refcount--;
+ return (0);
+ }
+
+ /* Increment generation number. */
+ lockp->gen++;
+
+ /* Get the object associated with this lock. */
+ sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
+
+ /* Remove this lock from its holders/waitlist. */
+ if (lockp->status != DB_LSTAT_HELD)
+ __lock_remove_waiter(lt->dbenv, sh_obj, lockp, DB_LSTAT_FREE);
+ else {
+ SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock);
+ lockp->links.stqe_prev = -1;
+ }
+
+ if (LF_ISSET(DB_LOCK_NOPROMOTE))
+ state_changed = 0;
+ else
+ state_changed =
+ __lock_promote(lt, sh_obj, LF_ISSET(DB_LOCK_NOWAITERS));
+
+ if (LF_ISSET(DB_LOCK_UNLINK))
+ ret = __lock_checklocker(lt, lockp, lockp->holder, flags, NULL);
+
+ /* Check if object should be reclaimed. */
+ if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL
+ && SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) {
+ HASHREMOVE_EL(lt->obj_tab,
+ obj_ndx, __db_lockobj, links, sh_obj);
+ if (sh_obj->lockobj.size > sizeof(sh_obj->objdata))
+ __db_shalloc_free(lt->reginfo.addr,
+ SH_DBT_PTR(&sh_obj->lockobj));
+ SH_TAILQ_INSERT_HEAD(
+ &region->free_objs, sh_obj, links, __db_lockobj);
+ region->nobjects--;
+ state_changed = 1;
+ }
+
+ /* Free lock. */
+ if (!LF_ISSET(DB_LOCK_UNLINK) && LF_ISSET(DB_LOCK_FREE)) {
+ lockp->status = DB_LSTAT_FREE;
+ SH_TAILQ_INSERT_HEAD(
+ &region->free_locks, lockp, links, __db_lock);
+ region->nlocks--;
+ }
+
+ /*
+ * If we did not promote anyone; we need to run the deadlock
+ * detector again.
+ */
+ if (state_changed == 0)
+ region->need_dd = 1;
+
+ return (ret);
+}
+
+/*
+ * Utility functions; listed alphabetically.
+ */
+
+/*
+ * __lock_checklocker --
+ * If a locker has no more locks, then we can free the object.
+ * Return a boolean indicating whether we freed the object or not.
+ *
+ * Must be called without the locker's lock set.
+ */
+static int
+__lock_checklocker(lt, lockp, locker, flags, freed)
+ DB_LOCKTAB *lt;
+ struct __db_lock *lockp;
+ u_int32_t locker, flags;
+ int *freed;
+{
+ DB_ENV *dbenv;
+ DB_LOCKER *sh_locker;
+ DB_LOCKREGION *region;
+ u_int32_t indx;
+ int ret;
+
+ dbenv = lt->dbenv;
+ region = lt->reginfo.primary;
+ ret = 0;
+
+ if (freed != NULL)
+ *freed = 0;
+
+ LOCKER_LOCK(lt, region, locker, indx);
+
+ /* If the locker's list is NULL, free up the locker. */
+ if ((ret = __lock_getlocker(lt,
+ locker, indx, 0, &sh_locker)) != 0 || sh_locker == NULL) {
+ if (ret == 0)
+ ret = EACCES;
+ __db_err(lt->dbenv, __db_locker_invalid);
+ goto freelock;
+ }
+
+ if (F_ISSET(sh_locker, DB_LOCKER_DELETED)) {
+ LF_CLR(DB_LOCK_FREE);
+ if (!LF_ISSET(DB_LOCK_IGNOREDEL))
+ goto freelock;
+ }
+
+ if (LF_ISSET(DB_LOCK_UNLINK))
+ SH_LIST_REMOVE(lockp, locker_links, __db_lock);
+
+ if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL
+ && LOCKER_FREEABLE(sh_locker)) {
+ __lock_freelocker( lt, region, sh_locker, indx);
+ if (freed != NULL)
+ *freed = 1;
+ }
+
+freelock:
+ if (LF_ISSET(DB_LOCK_FREE)) {
+ lockp->status = DB_LSTAT_FREE;
+ SH_TAILQ_INSERT_HEAD(
+ &region->free_locks, lockp, links, __db_lock);
+ region->nlocks--;
+ }
+
+ return (ret);
+}
+
+/*
+ * __lock_addfamilylocker
+ * Put a locker entry in for a child transaction.
+ *
+ * PUBLIC: int __lock_addfamilylocker __P((DB_ENV *, u_int32_t, u_int32_t));
+ */
+int
+__lock_addfamilylocker(dbenv, pid, id)
+ DB_ENV *dbenv;
+ u_int32_t pid, id;
+{
+ DB_LOCKER *lockerp, *mlockerp;
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ u_int32_t ndx;
+ int ret;
+
+ lt = dbenv->lk_handle;
+ region = lt->reginfo.primary;
+ LOCKREGION(dbenv, lt);
+
+ /* get/create the parent locker info */
+ LOCKER_LOCK(lt, region, pid, ndx);
+ if ((ret = __lock_getlocker(dbenv->lk_handle,
+ pid, ndx, 1, &mlockerp)) != 0)
+ goto err;
+
+ /*
+ * We assume that only one thread can manipulate
+ * a single transaction family.
+ * Therefore the master locker cannot go away while
+ * we manipulate it, nor can another child in the
+ * family be created at the same time.
+ */
+ LOCKER_LOCK(lt, region, id, ndx);
+ if ((ret = __lock_getlocker(dbenv->lk_handle,
+ id, ndx, 1, &lockerp)) != 0)
+ goto err;
+
+ /* Point to our parent. */
+ lockerp->parent_locker = R_OFFSET(&lt->reginfo, mlockerp);
+
+ /* See if this locker is the family master. */
+ if (mlockerp->master_locker == INVALID_ROFF)
+ lockerp->master_locker = R_OFFSET(&lt->reginfo, mlockerp);
+ else {
+ lockerp->master_locker = mlockerp->master_locker;
+ mlockerp = R_ADDR(&lt->reginfo, mlockerp->master_locker);
+ }
+
+ /*
+ * Link the child at the head of the master's list.
+ * The guess is when looking for deadlock that
+ * the most recent child is the one thats blocked.
+ */
+ SH_LIST_INSERT_HEAD(
+ &mlockerp->child_locker, lockerp, child_link, __db_locker);
+
+err:
+ UNLOCKREGION(dbenv, lt);
+
+ return (ret);
+}
+
+/*
+ * __lock_freefamilylocker
+ * Remove a locker from the hash table and its family.
+ *
+ * This must be called without the locker bucket locked.
+ *
+ * PUBLIC: int __lock_freefamilylocker __P((DB_LOCKTAB *, u_int32_t));
+ */
+int
+__lock_freefamilylocker(lt, locker)
+ DB_LOCKTAB *lt;
+ u_int32_t locker;
+{
+ DB_ENV *dbenv;
+ DB_LOCKER *sh_locker;
+ DB_LOCKREGION *region;
+ u_int32_t indx;
+ int ret;
+
+ dbenv = lt->dbenv;
+ region = lt->reginfo.primary;
+
+ LOCKREGION(dbenv, lt);
+ LOCKER_LOCK(lt, region, locker, indx);
+
+ if ((ret = __lock_getlocker(lt,
+ locker, indx, 0, &sh_locker)) != 0 || sh_locker == NULL) {
+ if (ret == 0)
+ ret = EACCES;
+ goto freelock;
+ }
+ if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) != NULL) {
+ ret = EINVAL;
+ __db_err(dbenv, "Freeing locker with locks");
+ goto freelock;
+ }
+
+ /* If this is part of a family, we must fix up its links. */
+ if (sh_locker->master_locker != INVALID_ROFF)
+ SH_LIST_REMOVE(sh_locker, child_link, __db_locker);
+
+ __lock_freelocker(lt, region, sh_locker, indx);
+
+freelock:
+ UNLOCKREGION(dbenv, lt);
+ return (ret);
+}
+
+/*
+ * __lock_freelocker
+ * common code for deleting a locker.
+ *
+ * This must be called with the locker bucket locked.
+ *
+ * PUBLIC: void __lock_freelocker __P((DB_LOCKTAB *,
+ * PUBLIC: DB_LOCKREGION *, DB_LOCKER *, u_int32_t));
+ */
+void
+__lock_freelocker(lt, region, sh_locker, indx)
+ DB_LOCKTAB *lt;
+ DB_LOCKREGION *region;
+ DB_LOCKER *sh_locker;
+ u_int32_t indx;
+
+{
+ HASHREMOVE_EL(
+ lt->locker_tab, indx, __db_locker, links, sh_locker);
+ SH_TAILQ_INSERT_HEAD(
+ &region->free_lockers, sh_locker, links, __db_locker);
+ region->nlockers--;
+}
+
+/*
+ * __lock_getlocker --
+ * Get a locker in the locker hash table. The create parameter
+ * indicates if the locker should be created if it doesn't exist in
+ * the table.
+ *
+ * This must be called with the locker bucket locked.
+ *
+ * PUBLIC: int __lock_getlocker __P((DB_LOCKTAB *,
+ * PUBLIC: u_int32_t, u_int32_t, int, DB_LOCKER **));
+ */
+int
+__lock_getlocker(lt, locker, indx, create, retp)
+ DB_LOCKTAB *lt;
+ u_int32_t locker, indx;
+ int create;
+ DB_LOCKER **retp;
+{
+ DB_ENV *dbenv;
+ DB_LOCKER *sh_locker;
+ DB_LOCKREGION *region;
+
+ dbenv = lt->dbenv;
+ region = lt->reginfo.primary;
+
+ HASHLOOKUP(lt->locker_tab,
+ indx, __db_locker, links, locker, sh_locker, __lock_locker_cmp);
+
+ /*
+ * If we found the locker, then we can just return it. If
+ * we didn't find the locker, then we need to create it.
+ */
+ if (sh_locker == NULL && create) {
+ /* Create new locker and then insert it into hash table. */
+ if ((sh_locker = SH_TAILQ_FIRST(
+ &region->free_lockers, __db_locker)) == NULL) {
+ __db_err(lt->dbenv, __db_lock_err, "locker entries");
+ return (ENOMEM);
+ }
+ SH_TAILQ_REMOVE(
+ &region->free_lockers, sh_locker, links, __db_locker);
+ if (++region->nlockers > region->maxnlockers)
+ region->maxnlockers = region->nlockers;
+
+ sh_locker->id = locker;
+ sh_locker->dd_id = 0;
+ sh_locker->master_locker = INVALID_ROFF;
+ sh_locker->parent_locker = INVALID_ROFF;
+ SH_LIST_INIT(&sh_locker->child_locker);
+ sh_locker->flags = 0;
+ SH_LIST_INIT(&sh_locker->heldby);
+
+ HASHINSERT(lt->locker_tab, indx, __db_locker, links, sh_locker);
+ }
+
+ *retp = sh_locker;
+ return (0);
+}
+
+/*
+ * __lock_getobj --
+ * Get an object in the object hash table. The create parameter
+ * indicates if the object should be created if it doesn't exist in
+ * the table.
+ *
+ * This must be called with the object bucket locked.
+ *
+ * PUBLIC: int __lock_getobj __P((DB_LOCKTAB *,
+ * PUBLIC: const DBT *, u_int32_t, int, DB_LOCKOBJ **));
+ */
+int
+__lock_getobj(lt, obj, ndx, create, retp)
+ DB_LOCKTAB *lt;
+ const DBT *obj;
+ u_int32_t ndx;
+ int create;
+ DB_LOCKOBJ **retp;
+{
+ DB_ENV *dbenv;
+ DB_LOCKOBJ *sh_obj;
+ DB_LOCKREGION *region;
+ int ret;
+ void *p;
+
+ dbenv = lt->dbenv;
+ region = lt->reginfo.primary;
+
+ /* Look up the object in the hash table. */
+ HASHLOOKUP(lt->obj_tab,
+ ndx, __db_lockobj, links, obj, sh_obj, __lock_cmp);
+
+ /*
+ * If we found the object, then we can just return it. If
+ * we didn't find the object, then we need to create it.
+ */
+ if (sh_obj == NULL && create) {
+ /* Create new object and then insert it into hash table. */
+ if ((sh_obj =
+ SH_TAILQ_FIRST(&region->free_objs, __db_lockobj)) == NULL) {
+ __db_err(lt->dbenv, __db_lock_err, "object entries");
+ ret = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * If we can fit this object in the structure, do so instead
+ * of shalloc-ing space for it.
+ */
+ if (obj->size <= sizeof(sh_obj->objdata))
+ p = sh_obj->objdata;
+ else if ((ret = __db_shalloc(
+ lt->reginfo.addr, obj->size, 0, &p)) != 0) {
+ __db_err(dbenv, "No space for lock object storage");
+ goto err;
+ }
+
+ memcpy(p, obj->data, obj->size);
+
+ SH_TAILQ_REMOVE(
+ &region->free_objs, sh_obj, links, __db_lockobj);
+ if (++region->nobjects > region->maxnobjects)
+ region->maxnobjects = region->nobjects;
+
+ SH_TAILQ_INIT(&sh_obj->waiters);
+ SH_TAILQ_INIT(&sh_obj->holders);
+ sh_obj->lockobj.size = obj->size;
+ sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p);
+
+ HASHINSERT(lt->obj_tab, ndx, __db_lockobj, links, sh_obj);
+ }
+
+ *retp = sh_obj;
+ return (0);
+
+err: return (ret);
+}
+
+/*
+ * __lock_is_parent --
+ * Given a locker and a transaction, return 1 if the locker is
+ * an ancestor of the designcated transaction. This is used to determine
+ * if we should grant locks that appear to conflict, but don't because
+ * the lock is already held by an ancestor.
+ */
+static int
+__lock_is_parent(lt, locker, sh_locker)
+ DB_LOCKTAB *lt;
+ u_int32_t locker;
+ DB_LOCKER *sh_locker;
+{
+ DB_LOCKER *parent;
+
+ parent = sh_locker;
+ while (parent->parent_locker != INVALID_ROFF) {
+ parent = (DB_LOCKER *)
+ R_ADDR(&lt->reginfo, parent->parent_locker);
+ if (parent->id == locker)
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * __lock_promote --
+ *
+ * Look through the waiters and holders lists and decide which (if any)
+ * locks can be promoted. Promote any that are eligible.
+ *
+ * PUBLIC: int __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *, int));
+ */
+int
+__lock_promote(lt, obj, not_waiters)
+ DB_LOCKTAB *lt;
+ DB_LOCKOBJ *obj;
+ int not_waiters;
+{
+ struct __db_lock *lp_w, *lp_h, *next_waiter;
+ DB_LOCKER *sh_locker;
+ DB_LOCKREGION *region;
+ u_int32_t locker_ndx;
+ int had_waiters, state_changed;
+
+ region = lt->reginfo.primary;
+ had_waiters = 0;
+
+ /*
+ * We need to do lock promotion. We also need to determine if we're
+ * going to need to run the deadlock detector again. If we release
+ * locks, and there are waiters, but no one gets promoted, then we
+ * haven't fundamentally changed the lockmgr state, so we may still
+ * have a deadlock and we have to run again. However, if there were
+ * no waiters, or we actually promoted someone, then we are OK and we
+ * don't have to run it immediately.
+ *
+ * During promotion, we look for state changes so we can return this
+ * information to the caller.
+ */
+
+ for (lp_w = SH_TAILQ_FIRST(&obj->waiters, __db_lock),
+ state_changed = lp_w == NULL;
+ lp_w != NULL;
+ lp_w = next_waiter) {
+ had_waiters = 1;
+ next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock);
+ /* Are we switching locks? */
+ if (not_waiters && lp_w->mode == DB_LOCK_WAIT)
+ continue;
+ for (lp_h = SH_TAILQ_FIRST(&obj->holders, __db_lock);
+ lp_h != NULL;
+ lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) {
+ if (lp_h->holder != lp_w->holder &&
+ CONFLICTS(lt, region, lp_h->mode, lp_w->mode)) {
+
+ LOCKER_LOCK(lt, region, lp_w->holder, locker_ndx);
+ if ((__lock_getlocker(lt, lp_w->holder,
+ locker_ndx, 0, &sh_locker)) != 0) {
+ DB_ASSERT(0);
+ break;
+ }
+ if (!__lock_is_parent(lt,
+ lp_h->holder, sh_locker))
+ break;
+ }
+ }
+ if (lp_h != NULL) /* Found a conflict. */
+ break;
+
+ /* No conflict, promote the waiting lock. */
+ SH_TAILQ_REMOVE(&obj->waiters, lp_w, links, __db_lock);
+ lp_w->status = DB_LSTAT_PENDING;
+ SH_TAILQ_INSERT_TAIL(&obj->holders, lp_w, links);
+
+ /* Wake up waiter. */
+ MUTEX_UNLOCK(lt->dbenv, &lp_w->mutex);
+ state_changed = 1;
+ }
+
+ /*
+ * If this object had waiters and doesn't any more, then we need
+ * to remove it from the dd_obj list.
+ */
+ if (had_waiters && SH_TAILQ_FIRST(&obj->waiters, __db_lock) == NULL)
+ SH_TAILQ_REMOVE(&region->dd_objs, obj, dd_links, __db_lockobj);
+ return (state_changed);
+}
+
+/*
+ * __lock_remove_waiter --
+ * Any lock on the waitlist has a process waiting for it. Therefore,
+ * we can't return the lock to the freelist immediately. Instead, we can
+ * remove the lock from the list of waiters, set the status field of the
+ * lock, and then let the process waking up return the lock to the
+ * free list.
+ *
+ * This must be called with the Object bucket locked.
+ */
+static void
+__lock_remove_waiter(dbenv, sh_obj, lockp, status)
+ DB_ENV *dbenv;
+ DB_LOCKOBJ *sh_obj;
+ struct __db_lock *lockp;
+ db_status_t status;
+{
+ int do_wakeup;
+
+ do_wakeup = lockp->status == DB_LSTAT_WAITING;
+
+ SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
+ lockp->links.stqe_prev = -1;
+ lockp->status = status;
+
+ /*
+ * Wake whoever is waiting on this lock.
+ *
+ * The MUTEX_UNLOCK macro normally resolves to a single argument,
+ * keep the compiler quiet.
+ */
+ if (do_wakeup)
+ MUTEX_UNLOCK(dbenv, &lockp->mutex);
+}
+
+/*
+ * __lock_printlock --
+ *
+ * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
+ */
+void
+__lock_printlock(lt, lp, ispgno)
+ DB_LOCKTAB *lt;
+ struct __db_lock *lp;
+ int ispgno;
+{
+ DB_LOCKOBJ *lockobj;
+ db_pgno_t pgno;
+ u_int32_t *fidp;
+ u_int8_t *ptr, type;
+ const char *mode, *status;
+
+ switch (lp->mode) {
+ case DB_LOCK_IREAD:
+ mode = "IREAD";
+ break;
+ case DB_LOCK_IWR:
+ mode = "IWR";
+ break;
+ case DB_LOCK_IWRITE:
+ mode = "IWRITE";
+ break;
+ case DB_LOCK_NG:
+ mode = "NG";
+ break;
+ case DB_LOCK_READ:
+ mode = "READ";
+ break;
+ case DB_LOCK_WRITE:
+ mode = "WRITE";
+ break;
+ case DB_LOCK_WAIT:
+ mode = "WAIT";
+ break;
+ default:
+ mode = "UNKNOWN";
+ break;
+ }
+ switch (lp->status) {
+ case DB_LSTAT_ABORTED:
+ status = "ABORT";
+ break;
+ case DB_LSTAT_ERR:
+ status = "ERROR";
+ break;
+ case DB_LSTAT_FREE:
+ status = "FREE";
+ break;
+ case DB_LSTAT_HELD:
+ status = "HELD";
+ break;
+ case DB_LSTAT_NOGRANT:
+ status = "NONE";
+ break;
+ case DB_LSTAT_WAITING:
+ status = "WAIT";
+ break;
+ case DB_LSTAT_PENDING:
+ status = "PENDING";
+ break;
+ default:
+ status = "UNKNOWN";
+ break;
+ }
+ printf("\t%lx\t%s\t%lu\t%s\t",
+ (u_long)lp->holder, mode, (u_long)lp->refcount, status);
+
+ lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
+ ptr = SH_DBT_PTR(&lockobj->lockobj);
+ if (ispgno && lockobj->lockobj.size == sizeof(struct __db_ilock)) {
+ /* Assume this is a DBT lock. */
+ memcpy(&pgno, ptr, sizeof(db_pgno_t));
+ fidp = (u_int32_t *)(ptr + sizeof(db_pgno_t));
+ type = *(u_int8_t *)(ptr + sizeof(db_pgno_t) + DB_FILE_ID_LEN);
+ printf("%s %lu (%lu %lu %lu %lu %lu)\n",
+ type == DB_PAGE_LOCK ? "page" : "record",
+ (u_long)pgno,
+ (u_long)fidp[0], (u_long)fidp[1], (u_long)fidp[2],
+ (u_long)fidp[3], (u_long)fidp[4]);
+ } else {
+ printf("0x%lx ", (u_long)R_OFFSET(&lt->reginfo, lockobj));
+ __db_pr(ptr, lockobj->lockobj.size);
+ printf("\n");
+ }
+}
diff --git a/bdb/lock/lock_conflict.c b/bdb/lock/lock_conflict.c
new file mode 100644
index 00000000000..2d7945fe201
--- /dev/null
+++ b/bdb/lock/lock_conflict.c
@@ -0,0 +1,34 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: lock_conflict.c,v 11.6 2000/12/12 17:38:13 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * The conflict arrays are set up such that the row is the lock you
+ * are holding and the column is the lock that is desired.
+ */
+
+const u_int8_t db_riw_conflicts[] = {
+ /* N S X WT IX IS SIX */
+ /* N */ 0, 0, 0, 0, 0, 0, 0,
+ /* S */ 0, 0, 1, 0, 1, 0, 1,
+ /* X */ 0, 1, 1, 1, 1, 1, 1,
+ /* WT */ 0, 0, 0, 0, 0, 0, 0,
+ /* IX */ 0, 1, 1, 0, 0, 0, 0,
+ /* IS */ 0, 0, 1, 0, 0, 0, 0,
+ /* SIX */ 0, 1, 1, 0, 0, 0, 0
+};
diff --git a/bdb/lock/lock_deadlock.c b/bdb/lock/lock_deadlock.c
new file mode 100644
index 00000000000..1f37db3890e
--- /dev/null
+++ b/bdb/lock/lock_deadlock.c
@@ -0,0 +1,637 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: lock_deadlock.c,v 11.23 2000/12/08 20:15:31 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "txn.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+#define ISSET_MAP(M, N) ((M)[(N) / 32] & (1 << (N) % 32))
+
+#define CLEAR_MAP(M, N) { \
+ u_int32_t __i; \
+ for (__i = 0; __i < (N); __i++) \
+ (M)[__i] = 0; \
+}
+
+#define SET_MAP(M, B) ((M)[(B) / 32] |= (1 << ((B) % 32)))
+#define CLR_MAP(M, B) ((M)[(B) / 32] &= ~(1 << ((B) % 32)))
+
+#define OR_MAP(D, S, N) { \
+ u_int32_t __i; \
+ for (__i = 0; __i < (N); __i++) \
+ D[__i] |= S[__i]; \
+}
+#define BAD_KILLID 0xffffffff
+
+typedef struct {
+ int valid;
+ u_int32_t id;
+ u_int32_t last_lock;
+ u_int32_t last_locker_id;
+ db_pgno_t pgno;
+} locker_info;
+
+static int __dd_abort __P((DB_ENV *, locker_info *));
+static int __dd_build
+ __P((DB_ENV *, u_int32_t **, u_int32_t *, locker_info **));
+static int __dd_find
+ __P((DB_ENV *,u_int32_t *, locker_info *, u_int32_t, u_int32_t ***));
+
+#ifdef DIAGNOSTIC
+static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t));
+#endif
+
+int
+lock_detect(dbenv, flags, atype, abortp)
+ DB_ENV *dbenv;
+ u_int32_t flags, atype;
+ int *abortp;
+{
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ locker_info *idmap;
+ u_int32_t *bitmap, **deadp, **free_me, i, killid, nentries, nlockers;
+ int do_pass, ret;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_lock_detect(dbenv, flags, atype, abortp));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK);
+
+ lt = dbenv->lk_handle;
+ if (abortp != NULL)
+ *abortp = 0;
+
+ /* Validate arguments. */
+ if ((ret =
+ __db_fchk(dbenv, "lock_detect", flags, DB_LOCK_CONFLICT)) != 0)
+ return (ret);
+
+ /* Check if a detector run is necessary. */
+ LOCKREGION(dbenv, lt);
+ if (LF_ISSET(DB_LOCK_CONFLICT)) {
+ /* Make a pass every time a lock waits. */
+ region = lt->reginfo.primary;
+ do_pass = region->need_dd != 0;
+
+ if (!do_pass) {
+ UNLOCKREGION(dbenv, lt);
+ return (0);
+ }
+ }
+
+ /* Build the waits-for bitmap. */
+ ret = __dd_build(dbenv, &bitmap, &nlockers, &idmap);
+ UNLOCKREGION(dbenv, lt);
+ if (ret != 0)
+ return (ret);
+
+ if (nlockers == 0)
+ return (0);
+#ifdef DIAGNOSTIC
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_WAITSFOR))
+ __dd_debug(dbenv, idmap, bitmap, nlockers);
+#endif
+ /* Find a deadlock. */
+ if ((ret = __dd_find(dbenv, bitmap, idmap, nlockers, &deadp)) != 0)
+ return (ret);
+
+ nentries = ALIGN(nlockers, 32) / 32;
+ killid = BAD_KILLID;
+ free_me = deadp;
+ for (; *deadp != NULL; deadp++) {
+ if (abortp != NULL)
+ ++*abortp;
+ switch (atype) { /* Kill someone. */
+ case DB_LOCK_OLDEST:
+ /*
+ * Find the first bit set in the current
+ * array and then look for a lower tid in
+ * the array.
+ */
+ for (i = 0; i < nlockers; i++)
+ if (ISSET_MAP(*deadp, i)) {
+ killid = i;
+ break;
+
+ }
+ /*
+ * It's conceivable that under XA, the locker could
+ * have gone away.
+ */
+ if (killid == BAD_KILLID)
+ break;
+
+ /*
+ * The oldest transaction has the lowest
+ * transaction id.
+ */
+ for (i = killid + 1; i < nlockers; i++)
+ if (ISSET_MAP(*deadp, i) &&
+ idmap[i].id < idmap[killid].id)
+ killid = i;
+ break;
+ case DB_LOCK_DEFAULT:
+ case DB_LOCK_RANDOM:
+ /*
+ * We are trying to calculate the id of the
+ * locker whose entry is indicated by deadlock.
+ */
+ killid = (*deadp - bitmap) / nentries;
+ break;
+ case DB_LOCK_YOUNGEST:
+ /*
+ * Find the first bit set in the current
+ * array and then look for a lower tid in
+ * the array.
+ */
+ for (i = 0; i < nlockers; i++)
+ if (ISSET_MAP(*deadp, i)) {
+ killid = i;
+ break;
+ }
+
+ /*
+ * It's conceivable that under XA, the locker could
+ * have gone away.
+ */
+ if (killid == BAD_KILLID)
+ break;
+
+ /*
+ * The youngest transaction has the highest
+ * transaction id.
+ */
+ for (i = killid + 1; i < nlockers; i++)
+ if (ISSET_MAP(*deadp, i) &&
+ idmap[i].id > idmap[killid].id)
+ killid = i;
+ break;
+ default:
+ killid = BAD_KILLID;
+ ret = EINVAL;
+ }
+
+ if (killid == BAD_KILLID)
+ continue;
+
+ /* Kill the locker with lockid idmap[killid]. */
+ if ((ret = __dd_abort(dbenv, &idmap[killid])) != 0) {
+ /*
+ * It's possible that the lock was already aborted;
+ * this isn't necessarily a problem, so do not treat
+ * it as an error.
+ */
+ if (ret == DB_ALREADY_ABORTED)
+ ret = 0;
+ else
+ __db_err(dbenv,
+ "warning: unable to abort locker %lx",
+ (u_long)idmap[killid].id);
+ } else if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK))
+ __db_err(dbenv,
+ "Aborting locker %lx", (u_long)idmap[killid].id);
+ }
+ __os_free(free_me, 0);
+ __os_free(bitmap, 0);
+ __os_free(idmap, 0);
+
+ return (ret);
+}
+
+/*
+ * ========================================================================
+ * Utilities
+ */
+
+# define DD_INVALID_ID ((u_int32_t) -1)
+
+static int
+__dd_build(dbenv, bmp, nlockers, idmap)
+ DB_ENV *dbenv;
+ u_int32_t **bmp, *nlockers;
+ locker_info **idmap;
+{
+ struct __db_lock *lp;
+ DB_LOCKER *lip, *lockerp, *child;
+ DB_LOCKOBJ *op, *lo;
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ locker_info *id_array;
+ u_int32_t *bitmap, count, dd, *entryp, i, id, ndx, nentries, *tmpmap;
+ u_int8_t *pptr;
+ int is_first, ret;
+
+ lt = dbenv->lk_handle;
+ region = lt->reginfo.primary;
+
+ /*
+ * We'll check how many lockers there are, add a few more in for
+ * good measure and then allocate all the structures. Then we'll
+ * verify that we have enough room when we go back in and get the
+ * mutex the second time.
+ */
+retry: count = region->nlockers;
+ region->need_dd = 0;
+
+ if (count == 0) {
+ *nlockers = 0;
+ return (0);
+ }
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK))
+ __db_err(dbenv, "%lu lockers", (u_long)count);
+
+ count += 40;
+ nentries = ALIGN(count, 32) / 32;
+
+ /*
+ * Allocate enough space for a count by count bitmap matrix.
+ *
+ * XXX
+ * We can probably save the malloc's between iterations just
+ * reallocing if necessary because count grew by too much.
+ */
+ if ((ret = __os_calloc(dbenv, (size_t)count,
+ sizeof(u_int32_t) * nentries, &bitmap)) != 0)
+ return (ret);
+
+ if ((ret = __os_calloc(dbenv,
+ sizeof(u_int32_t), nentries, &tmpmap)) != 0) {
+ __os_free(bitmap, sizeof(u_int32_t) * nentries);
+ return (ret);
+ }
+
+ if ((ret = __os_calloc(dbenv,
+ (size_t)count, sizeof(locker_info), &id_array)) != 0) {
+ __os_free(bitmap, count * sizeof(u_int32_t) * nentries);
+ __os_free(tmpmap, sizeof(u_int32_t) * nentries);
+ return (ret);
+ }
+
+ /*
+ * Now go back in and actually fill in the matrix.
+ */
+ if (region->nlockers > count) {
+ __os_free(bitmap, count * sizeof(u_int32_t) * nentries);
+ __os_free(tmpmap, sizeof(u_int32_t) * nentries);
+ __os_free(id_array, count * sizeof(locker_info));
+ goto retry;
+ }
+
+ /*
+ * First we go through and assign each locker a deadlock detector id.
+ */
+ for (id = 0, i = 0; i < region->locker_t_size; i++) {
+ for (lip = SH_TAILQ_FIRST(&lt->locker_tab[i], __db_locker);
+ lip != NULL; lip = SH_TAILQ_NEXT(lip, links, __db_locker))
+ if (lip->master_locker == INVALID_ROFF) {
+ lip->dd_id = id++;
+ id_array[lip->dd_id].id = lip->id;
+ } else
+ lip->dd_id = DD_INVALID_ID;
+ }
+
+ /*
+ * We only need consider objects that have waiters, so we use
+ * the list of objects with waiters (dd_objs) instead of traversing
+ * the entire hash table. For each object, we traverse the waiters
+ * list and add an entry in the waitsfor matrix for each waiter/holder
+ * combination.
+ */
+ for (op = SH_TAILQ_FIRST(&region->dd_objs, __db_lockobj);
+ op != NULL; op = SH_TAILQ_NEXT(op, dd_links, __db_lockobj)) {
+ CLEAR_MAP(tmpmap, nentries);
+
+ /*
+ * First we go through and create a bit map that
+ * represents all the holders of this object.
+ */
+ for (lp = SH_TAILQ_FIRST(&op->holders, __db_lock);
+ lp != NULL;
+ lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+ LOCKER_LOCK(lt, region, lp->holder, ndx);
+ if ((ret = __lock_getlocker(lt,
+ lp->holder, ndx, 0, &lockerp)) != 0)
+ continue;
+ if (lockerp->dd_id == DD_INVALID_ID)
+ dd = ((DB_LOCKER *)
+ R_ADDR(&lt->reginfo,
+ lockerp->master_locker))->dd_id;
+ else
+ dd = lockerp->dd_id;
+ id_array[dd].valid = 1;
+
+ /*
+ * If the holder has already been aborted, then
+ * we should ignore it for now.
+ */
+ if (lp->status == DB_LSTAT_HELD)
+ SET_MAP(tmpmap, dd);
+ }
+
+ /*
+ * Next, for each waiter, we set its row in the matrix
+ * equal to the map of holders we set up above.
+ */
+ for (is_first = 1,
+ lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
+ lp != NULL;
+ is_first = 0,
+ lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+ LOCKER_LOCK(lt, region, lp->holder, ndx);
+ if ((ret = __lock_getlocker(lt,
+ lp->holder, ndx, 0, &lockerp)) != 0)
+ continue;
+ if (lockerp->dd_id == DD_INVALID_ID)
+ dd = ((DB_LOCKER *)
+ R_ADDR(&lt->reginfo,
+ lockerp->master_locker))->dd_id;
+ else
+ dd = lockerp->dd_id;
+ id_array[dd].valid = 1;
+
+ /*
+ * If the transaction is pending abortion, then
+ * ignore it on this iteration.
+ */
+ if (lp->status != DB_LSTAT_WAITING)
+ continue;
+
+ entryp = bitmap + (nentries * dd);
+ OR_MAP(entryp, tmpmap, nentries);
+ /*
+ * If this is the first waiter on the queue,
+ * then we remove the waitsfor relationship
+ * with oneself. However, if it's anywhere
+ * else on the queue, then we have to keep
+ * it and we have an automatic deadlock.
+ */
+ if (is_first)
+ CLR_MAP(entryp, dd);
+ }
+ }
+
+ /* Now for each locker; record its last lock. */
+ for (id = 0; id < count; id++) {
+ if (!id_array[id].valid)
+ continue;
+ LOCKER_LOCK(lt, region, id_array[id].id, ndx);
+ if ((ret = __lock_getlocker(lt,
+ id_array[id].id, ndx, 0, &lockerp)) != 0) {
+ __db_err(dbenv,
+ "No locks for locker %lu", (u_long)id_array[id].id);
+ continue;
+ }
+
+ /*
+ * If this is a master transaction, try to
+ * find one of its children's locks first,
+ * as they are probably more recent.
+ */
+ child = SH_LIST_FIRST(&lockerp->child_locker, __db_locker);
+ if (child != NULL) {
+ do {
+ lp = SH_LIST_FIRST(&child->heldby, __db_lock);
+ if (lp != NULL &&
+ lp->status == DB_LSTAT_WAITING) {
+ id_array[id].last_locker_id = child->id;
+ goto get_lock;
+ }
+ child = SH_LIST_NEXT(
+ child, child_link, __db_locker);
+ } while (child != NULL);
+ }
+ lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
+ if (lp != NULL) {
+ id_array[id].last_locker_id = lockerp->id;
+ get_lock: id_array[id].last_lock = R_OFFSET(&lt->reginfo, lp);
+ lo = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
+ pptr = SH_DBT_PTR(&lo->lockobj);
+ if (lo->lockobj.size >= sizeof(db_pgno_t))
+ memcpy(&id_array[id].pgno,
+ pptr, sizeof(db_pgno_t));
+ else
+ id_array[id].pgno = 0;
+ }
+ }
+
+ /* Pass complete, reset the deadlock detector bit. */
+ region->need_dd = 0;
+
+ /*
+ * Now we can release everything except the bitmap matrix that we
+ * created.
+ */
+ *nlockers = id;
+ *idmap = id_array;
+ *bmp = bitmap;
+ __os_free(tmpmap, sizeof(u_int32_t) * nentries);
+ return (0);
+}
+
+static int
+__dd_find(dbenv, bmp, idmap, nlockers, deadp)
+ DB_ENV *dbenv;
+ u_int32_t *bmp, nlockers;
+ locker_info *idmap;
+ u_int32_t ***deadp;
+{
+ u_int32_t i, j, k, nentries, *mymap, *tmpmap;
+ u_int32_t **retp;
+ int ndead, ndeadalloc, ret;
+
+#undef INITIAL_DEAD_ALLOC
+#define INITIAL_DEAD_ALLOC 8
+
+ ndeadalloc = INITIAL_DEAD_ALLOC;
+ ndead = 0;
+ if ((ret = __os_malloc(dbenv,
+ ndeadalloc * sizeof(u_int32_t *), NULL, &retp)) != 0)
+ return (ret);
+
+ /*
+ * For each locker, OR in the bits from the lockers on which that
+ * locker is waiting.
+ */
+ nentries = ALIGN(nlockers, 32) / 32;
+ for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) {
+ if (!idmap[i].valid)
+ continue;
+ for (j = 0; j < nlockers; j++) {
+ if (!ISSET_MAP(mymap, j))
+ continue;
+
+ /* Find the map for this bit. */
+ tmpmap = bmp + (nentries * j);
+ OR_MAP(mymap, tmpmap, nentries);
+ if (!ISSET_MAP(mymap, i))
+ continue;
+
+ /* Make sure we leave room for NULL. */
+ if (ndead + 2 >= ndeadalloc) {
+ ndeadalloc <<= 1;
+ /*
+ * If the alloc fails, then simply return the
+ * deadlocks that we already have.
+ */
+ if (__os_realloc(dbenv,
+ ndeadalloc * sizeof(u_int32_t),
+ NULL, &retp) != 0) {
+ retp[ndead] = NULL;
+ *deadp = retp;
+ return (0);
+ }
+ }
+ retp[ndead++] = mymap;
+
+ /* Mark all participants in this deadlock invalid. */
+ for (k = 0; k < nlockers; k++)
+ if (ISSET_MAP(mymap, k))
+ idmap[k].valid = 0;
+ break;
+ }
+ }
+ retp[ndead] = NULL;
+ *deadp = retp;
+ return (0);
+}
+
+static int
+__dd_abort(dbenv, info)
+ DB_ENV *dbenv;
+ locker_info *info;
+{
+ struct __db_lock *lockp;
+ DB_LOCKER *lockerp;
+ DB_LOCKOBJ *sh_obj;
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ u_int32_t ndx;
+ int ret;
+
+ lt = dbenv->lk_handle;
+ region = lt->reginfo.primary;
+
+ LOCKREGION(dbenv, lt);
+ /* Find the locker's last lock. */
+ LOCKER_LOCK(lt, region, info->last_locker_id, ndx);
+ if ((ret = __lock_getlocker(lt,
+ info->last_locker_id, ndx, 0, &lockerp)) != 0 || lockerp == NULL) {
+ if (ret == 0)
+ ret = DB_ALREADY_ABORTED;
+ goto out;
+ }
+
+ lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
+
+ /*
+ * It's possible that this locker was already aborted. If that's
+ * the case, make sure that we remove its locker from the hash table.
+ */
+ if (lockp == NULL) {
+ if (LOCKER_FREEABLE(lockerp)) {
+ __lock_freelocker(lt, region, lockerp, ndx);
+ goto out;
+ }
+ } else if (R_OFFSET(&lt->reginfo, lockp) != info->last_lock ||
+ lockp->status != DB_LSTAT_WAITING) {
+ ret = DB_ALREADY_ABORTED;
+ goto out;
+ }
+
+ sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
+ SH_LIST_REMOVE(lockp, locker_links, __db_lock);
+
+ /* Abort lock, take it off list, and wake up this lock. */
+ SHOBJECT_LOCK(lt, region, sh_obj, ndx);
+ lockp->status = DB_LSTAT_ABORTED;
+ SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
+
+ /*
+ * Either the waiters list is now empty, in which case we remove
+ * it from dd_objs, or it is not empty, in which case we need to
+ * do promotion.
+ */
+ if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL)
+ SH_TAILQ_REMOVE(&region->dd_objs,
+ sh_obj, dd_links, __db_lockobj);
+ else
+ ret = __lock_promote(lt, sh_obj, 0);
+ MUTEX_UNLOCK(dbenv, &lockp->mutex);
+
+ region->ndeadlocks++;
+ UNLOCKREGION(dbenv, lt);
+
+ return (0);
+
+out: UNLOCKREGION(dbenv, lt);
+ return (ret);
+}
+
+#ifdef DIAGNOSTIC
+static void
+__dd_debug(dbenv, idmap, bitmap, nlockers)
+ DB_ENV *dbenv;
+ locker_info *idmap;
+ u_int32_t *bitmap, nlockers;
+{
+ u_int32_t i, j, *mymap, nentries;
+ int ret;
+ char *msgbuf;
+
+ __db_err(dbenv, "Waitsfor array\nWaiter:\tWaiting on:");
+
+ /* Allocate space to print 10 bytes per item waited on. */
+#undef MSGBUF_LEN
+#define MSGBUF_LEN ((nlockers + 1) * 10 + 64)
+ if ((ret = __os_malloc(dbenv, MSGBUF_LEN, NULL, &msgbuf)) != 0)
+ return;
+
+ nentries = ALIGN(nlockers, 32) / 32;
+ for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) {
+ if (!idmap[i].valid)
+ continue;
+ sprintf(msgbuf, /* Waiter. */
+ "%lx/%lu:\t", (u_long)idmap[i].id, (u_long)idmap[i].pgno);
+ for (j = 0; j < nlockers; j++)
+ if (ISSET_MAP(mymap, j))
+ sprintf(msgbuf, "%s %lx", msgbuf,
+ (u_long)idmap[j].id);
+ (void)sprintf(msgbuf,
+ "%s %lu", msgbuf, (u_long)idmap[i].last_lock);
+ __db_err(dbenv, msgbuf);
+ }
+
+ __os_free(msgbuf, MSGBUF_LEN);
+}
+#endif
diff --git a/bdb/lock/lock_method.c b/bdb/lock/lock_method.c
new file mode 100644
index 00000000000..46ed9e5166f
--- /dev/null
+++ b/bdb/lock/lock_method.c
@@ -0,0 +1,148 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: lock_method.c,v 11.5 2000/12/21 19:16:42 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_shash.h"
+#include "lock.h"
+
+/*
+ * __lock_set_lk_conflicts
+ * Set the conflicts matrix.
+ *
+ * PUBLIC: int __lock_set_lk_conflicts __P((DB_ENV *, u_int8_t *, int));
+ */
+int
+__lock_set_lk_conflicts(dbenv, lk_conflicts, lk_modes)
+ DB_ENV *dbenv;
+ u_int8_t *lk_conflicts;
+ int lk_modes;
+{
+ int ret;
+
+ ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_conflicts");
+
+ if (dbenv->lk_conflicts != NULL) {
+ __os_free(dbenv->lk_conflicts,
+ dbenv->lk_modes * dbenv->lk_modes);
+ dbenv->lk_conflicts = NULL;
+ }
+ if ((ret = __os_malloc(dbenv,
+ lk_modes * lk_modes, NULL, &dbenv->lk_conflicts)) != 0)
+ return (ret);
+ memcpy(dbenv->lk_conflicts, lk_conflicts, lk_modes * lk_modes);
+ dbenv->lk_modes = lk_modes;
+
+ return (0);
+}
+
+/*
+ * __lock_set_lk_detect
+ * Set the automatic deadlock detection.
+ *
+ * PUBLIC: int __lock_set_lk_detect __P((DB_ENV *, u_int32_t));
+ */
+int
+__lock_set_lk_detect(dbenv, lk_detect)
+ DB_ENV *dbenv;
+ u_int32_t lk_detect;
+{
+ ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_detect");
+
+ switch (lk_detect) {
+ case DB_LOCK_DEFAULT:
+ case DB_LOCK_OLDEST:
+ case DB_LOCK_RANDOM:
+ case DB_LOCK_YOUNGEST:
+ break;
+ default:
+ return (EINVAL);
+ }
+ dbenv->lk_detect = lk_detect;
+ return (0);
+}
+
+/*
+ * __lock_set_lk_max
+ * Set the lock table size.
+ *
+ * PUBLIC: int __lock_set_lk_max __P((DB_ENV *, u_int32_t));
+ */
+int
+__lock_set_lk_max(dbenv, lk_max)
+ DB_ENV *dbenv;
+ u_int32_t lk_max;
+{
+ ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_max");
+
+ dbenv->lk_max = lk_max;
+ dbenv->lk_max_objects = lk_max;
+ dbenv->lk_max_lockers = lk_max;
+ return (0);
+}
+
+/*
+ * __lock_set_lk_max_locks
+ * Set the lock table size.
+ *
+ * PUBLIC: int __lock_set_lk_max_locks __P((DB_ENV *, u_int32_t));
+ */
+int
+__lock_set_lk_max_locks(dbenv, lk_max)
+ DB_ENV *dbenv;
+ u_int32_t lk_max;
+{
+ ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_max_locks");
+
+ dbenv->lk_max = lk_max;
+ return (0);
+}
+
+/*
+ * __lock_set_lk_max_lockers
+ * Set the lock table size.
+ *
+ * PUBLIC: int __lock_set_lk_max_lockers __P((DB_ENV *, u_int32_t));
+ */
+int
+__lock_set_lk_max_lockers(dbenv, lk_max)
+ DB_ENV *dbenv;
+ u_int32_t lk_max;
+{
+ ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_max_lockers");
+
+ dbenv->lk_max_lockers = lk_max;
+ return (0);
+}
+
+/*
+ * __lock_set_lk_max_objects
+ * Set the lock table size.
+ *
+ * PUBLIC: int __lock_set_lk_max_objects __P((DB_ENV *, u_int32_t));
+ */
+int
+__lock_set_lk_max_objects(dbenv, lk_max)
+ DB_ENV *dbenv;
+ u_int32_t lk_max;
+{
+ ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_lk_max_objects");
+
+ dbenv->lk_max_objects = lk_max;
+ return (0);
+}
diff --git a/bdb/lock/lock_region.c b/bdb/lock/lock_region.c
new file mode 100644
index 00000000000..4bd4ee4b765
--- /dev/null
+++ b/bdb/lock/lock_region.c
@@ -0,0 +1,430 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: lock_region.c,v 11.41 2000/12/20 21:53:04 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "db_shash.h"
+#include "lock.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+static int __lock_init __P((DB_ENV *, DB_LOCKTAB *));
+static size_t
+ __lock_region_size __P((DB_ENV *));
+
+#ifdef MUTEX_SYSTEM_RESOURCES
+static size_t __lock_region_maint __P((DB_ENV *));
+#endif
+
+/*
+ * This conflict array is used for concurrent db access (CDB). It
+ * uses the same locks as the db_rw_conflict array, but adds an IW
+ * mode to be used for write cursors.
+ */
+#define DB_LOCK_CDB_N 5
+static u_int8_t const db_cdb_conflicts[] = {
+ /* N R W WT IW*/
+ /* N */ 0, 0, 0, 0, 0,
+ /* R */ 0, 0, 1, 0, 0,
+ /* W */ 0, 1, 1, 1, 1,
+ /* WT */ 0, 0, 0, 0, 0,
+ /* IW */ 0, 0, 1, 0, 1,
+};
+
+/*
+ * __lock_dbenv_create --
+ * Lock specific creation of the DB_ENV structure.
+ *
+ * PUBLIC: void __lock_dbenv_create __P((DB_ENV *));
+ */
+void
+__lock_dbenv_create(dbenv)
+ DB_ENV *dbenv;
+{
+ dbenv->lk_max = DB_LOCK_DEFAULT_N;
+ dbenv->lk_max_lockers = DB_LOCK_DEFAULT_N;
+ dbenv->lk_max_objects = DB_LOCK_DEFAULT_N;
+
+ dbenv->set_lk_conflicts = __lock_set_lk_conflicts;
+ dbenv->set_lk_detect = __lock_set_lk_detect;
+ dbenv->set_lk_max = __lock_set_lk_max;
+ dbenv->set_lk_max_locks = __lock_set_lk_max_locks;
+ dbenv->set_lk_max_lockers = __lock_set_lk_max_lockers;
+ dbenv->set_lk_max_objects = __lock_set_lk_max_objects;
+
+#ifdef HAVE_RPC
+ /*
+ * If we have a client, overwrite what we just set up to point
+ * to the client functions.
+ */
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) {
+ dbenv->set_lk_conflicts = __dbcl_set_lk_conflict;
+ dbenv->set_lk_detect = __dbcl_set_lk_detect;
+ dbenv->set_lk_max = __dbcl_set_lk_max;
+ dbenv->set_lk_max_locks = __dbcl_set_lk_max_locks;
+ dbenv->set_lk_max_lockers = __dbcl_set_lk_max_lockers;
+ dbenv->set_lk_max_objects = __dbcl_set_lk_max_objects;
+ }
+#endif
+}
+
+/*
+ * __lock_dbenv_close --
+ * Lock specific destruction of the DB_ENV structure.
+ *
+ * PUBLIC: void __lock_dbenv_close __P((DB_ENV *));
+ */
+void
+__lock_dbenv_close(dbenv)
+ DB_ENV *dbenv;
+{
+ if (!F_ISSET(dbenv, DB_ENV_USER_ALLOC) && dbenv->lk_conflicts != NULL) {
+ __os_free(dbenv->lk_conflicts,
+ dbenv->lk_modes * dbenv->lk_modes);
+ dbenv->lk_conflicts = NULL;
+ }
+}
+
+/*
+ * __lock_open --
+ * Internal version of lock_open: only called from DB_ENV->open.
+ *
+ * PUBLIC: int __lock_open __P((DB_ENV *));
+ */
+int
+__lock_open(dbenv)
+ DB_ENV *dbenv;
+{
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ size_t size;
+ int ret;
+
+ /* Create the lock table structure. */
+ if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOCKTAB), &lt)) != 0)
+ return (ret);
+ lt->dbenv = dbenv;
+
+ /* Join/create the lock region. */
+ lt->reginfo.type = REGION_TYPE_LOCK;
+ lt->reginfo.id = INVALID_REGION_ID;
+ lt->reginfo.mode = dbenv->db_mode;
+ lt->reginfo.flags = REGION_JOIN_OK;
+ if (F_ISSET(dbenv, DB_ENV_CREATE))
+ F_SET(&lt->reginfo, REGION_CREATE_OK);
+ size = __lock_region_size(dbenv);
+ if ((ret = __db_r_attach(dbenv, &lt->reginfo, size)) != 0)
+ goto err;
+
+ /* If we created the region, initialize it. */
+ if (F_ISSET(&lt->reginfo, REGION_CREATE))
+ if ((ret = __lock_init(dbenv, lt)) != 0)
+ goto err;
+
+ /* Set the local addresses. */
+ region = lt->reginfo.primary =
+ R_ADDR(&lt->reginfo, lt->reginfo.rp->primary);
+
+ /* Check for incompatible automatic deadlock detection requests. */
+ if (dbenv->lk_detect != DB_LOCK_NORUN) {
+ if (region->detect != DB_LOCK_NORUN &&
+ dbenv->lk_detect != DB_LOCK_DEFAULT &&
+ region->detect != dbenv->lk_detect) {
+ __db_err(dbenv,
+ "lock_open: incompatible deadlock detector mode");
+ ret = EINVAL;
+ goto err;
+ }
+
+ /*
+ * Upgrade if our caller wants automatic detection, and it
+ * was not currently being done, whether or not we created
+ * the region.
+ */
+ if (region->detect == DB_LOCK_NORUN)
+ region->detect = dbenv->lk_detect;
+ }
+
+ /* Set remaining pointers into region. */
+ lt->conflicts = (u_int8_t *)R_ADDR(&lt->reginfo, region->conf_off);
+ lt->obj_tab = (DB_HASHTAB *)R_ADDR(&lt->reginfo, region->obj_off);
+ lt->locker_tab = (DB_HASHTAB *)R_ADDR(&lt->reginfo, region->locker_off);
+
+ R_UNLOCK(dbenv, &lt->reginfo);
+
+ dbenv->lk_handle = lt;
+ return (0);
+
+err: if (lt->reginfo.addr != NULL) {
+ if (F_ISSET(&lt->reginfo, REGION_CREATE))
+ ret = __db_panic(dbenv, ret);
+ R_UNLOCK(dbenv, &lt->reginfo);
+ (void)__db_r_detach(dbenv, &lt->reginfo, 0);
+ }
+ __os_free(lt, sizeof(*lt));
+ return (ret);
+}
+
+/*
+ * __lock_init --
+ * Initialize the lock region.
+ */
+static int
+__lock_init(dbenv, lt)
+ DB_ENV *dbenv;
+ DB_LOCKTAB *lt;
+{
+ const u_int8_t *lk_conflicts;
+ struct __db_lock *lp;
+ DB_LOCKER *lidp;
+ DB_LOCKOBJ *op;
+ DB_LOCKREGION *region;
+#ifdef MUTEX_SYSTEM_RESOURCES
+ size_t maint_size;
+#endif
+ u_int32_t i, lk_modes;
+ u_int8_t *addr;
+ int ret;
+
+ if ((ret = __db_shalloc(lt->reginfo.addr,
+ sizeof(DB_LOCKREGION), 0, &lt->reginfo.primary)) != 0)
+ goto mem_err;
+ lt->reginfo.rp->primary = R_OFFSET(&lt->reginfo, lt->reginfo.primary);
+ region = lt->reginfo.primary;
+ memset(region, 0, sizeof(*region));
+
+ /* Select a conflict matrix if none specified. */
+ if (dbenv->lk_modes == 0)
+ if (CDB_LOCKING(dbenv)) {
+ lk_modes = DB_LOCK_CDB_N;
+ lk_conflicts = db_cdb_conflicts;
+ } else {
+ lk_modes = DB_LOCK_RIW_N;
+ lk_conflicts = db_riw_conflicts;
+ }
+ else {
+ lk_modes = dbenv->lk_modes;
+ lk_conflicts = dbenv->lk_conflicts;
+ }
+
+ region->id = 0;
+ region->need_dd = 0;
+ region->detect = DB_LOCK_NORUN;
+ region->maxlocks = dbenv->lk_max;
+ region->maxlockers = dbenv->lk_max_lockers;
+ region->maxobjects = dbenv->lk_max_objects;
+ region->locker_t_size = __db_tablesize(dbenv->lk_max_lockers);
+ region->object_t_size = __db_tablesize(dbenv->lk_max_objects);
+ region->nmodes = lk_modes;
+ region->nlocks = 0;
+ region->maxnlocks = 0;
+ region->nlockers = 0;
+ region->maxnlockers = 0;
+ region->nobjects = 0;
+ region->maxnobjects = 0;
+ region->nconflicts = 0;
+ region->nrequests = 0;
+ region->nreleases = 0;
+ region->ndeadlocks = 0;
+
+ /* Allocate room for the conflict matrix and initialize it. */
+ if ((ret =
+ __db_shalloc(lt->reginfo.addr, lk_modes * lk_modes, 0, &addr)) != 0)
+ goto mem_err;
+ memcpy(addr, lk_conflicts, lk_modes * lk_modes);
+ region->conf_off = R_OFFSET(&lt->reginfo, addr);
+
+ /* Allocate room for the object hash table and initialize it. */
+ if ((ret = __db_shalloc(lt->reginfo.addr,
+ region->object_t_size * sizeof(DB_HASHTAB), 0, &addr)) != 0)
+ goto mem_err;
+ __db_hashinit(addr, region->object_t_size);
+ region->obj_off = R_OFFSET(&lt->reginfo, addr);
+
+ /* Allocate room for the locker hash table and initialize it. */
+ if ((ret = __db_shalloc(lt->reginfo.addr,
+ region->locker_t_size * sizeof(DB_HASHTAB), 0, &addr)) != 0)
+ goto mem_err;
+ __db_hashinit(addr, region->locker_t_size);
+ region->locker_off = R_OFFSET(&lt->reginfo, addr);
+
+#ifdef MUTEX_SYSTEM_RESOURCES
+ maint_size = __lock_region_maint(dbenv);
+ /* Allocate room for the locker maintenance info and initialize it. */
+ if ((ret = __db_shalloc(lt->reginfo.addr,
+ sizeof(REGMAINT) + maint_size, 0, &addr)) != 0)
+ goto mem_err;
+ __db_maintinit(&lt->reginfo, addr, maint_size);
+ region->maint_off = R_OFFSET(&lt->reginfo, addr);
+#endif
+
+ /*
+ * Initialize locks onto a free list. Initialize and lock the mutex
+ * so that when we need to block, all we need do is try to acquire
+ * the mutex.
+ */
+ SH_TAILQ_INIT(&region->free_locks);
+ for (i = 0; i < region->maxlocks; ++i) {
+ if ((ret = __db_shalloc(lt->reginfo.addr,
+ sizeof(struct __db_lock), MUTEX_ALIGN, &lp)) != 0)
+ goto mem_err;
+ lp->status = DB_LSTAT_FREE;
+ if ((ret = __db_shmutex_init(dbenv, &lp->mutex,
+ R_OFFSET(&lt->reginfo, &lp->mutex) + DB_FCNTL_OFF_LOCK,
+ MUTEX_SELF_BLOCK, &lt->reginfo,
+ (REGMAINT *)R_ADDR(&lt->reginfo, region->maint_off))) != 0)
+ return (ret);
+ MUTEX_LOCK(dbenv, &lp->mutex, lt->dbenv->lockfhp);
+ SH_TAILQ_INSERT_HEAD(&region->free_locks, lp, links, __db_lock);
+ }
+
+ /* Initialize objects onto a free list. */
+ SH_TAILQ_INIT(&region->dd_objs);
+ SH_TAILQ_INIT(&region->free_objs);
+ for (i = 0; i < region->maxobjects; ++i) {
+ if ((ret = __db_shalloc(lt->reginfo.addr,
+ sizeof(DB_LOCKOBJ), 0, &op)) != 0)
+ goto mem_err;
+ SH_TAILQ_INSERT_HEAD(
+ &region->free_objs, op, links, __db_lockobj);
+ }
+
+ /* Initialize lockers onto a free list. */
+ SH_TAILQ_INIT(&region->free_lockers);
+ for (i = 0; i < region->maxlockers; ++i) {
+ if ((ret = __db_shalloc(lt->reginfo.addr,
+ sizeof(DB_LOCKER), 0, &lidp)) != 0) {
+mem_err: __db_err(dbenv, "Unable to allocate memory for the lock table");
+ return (ret);
+ }
+ SH_TAILQ_INSERT_HEAD(
+ &region->free_lockers, lidp, links, __db_locker);
+ }
+
+ return (0);
+}
+
+/*
+ * __lock_close --
+ * Internal version of lock_close: only called from db_appinit.
+ *
+ * PUBLIC: int __lock_close __P((DB_ENV *));
+ */
+int
+__lock_close(dbenv)
+ DB_ENV *dbenv;
+{
+ DB_LOCKTAB *lt;
+ int ret;
+
+ lt = dbenv->lk_handle;
+
+ /* Detach from the region. */
+ ret = __db_r_detach(dbenv, &lt->reginfo, 0);
+
+ __os_free(lt, sizeof(*lt));
+
+ dbenv->lk_handle = NULL;
+ return (ret);
+}
+
+/*
+ * __lock_region_size --
+ * Return the region size.
+ */
+static size_t
+__lock_region_size(dbenv)
+ DB_ENV *dbenv;
+{
+ size_t retval;
+
+ /*
+ * Figure out how much space we're going to need. This list should
+ * map one-to-one with the __db_shalloc calls in __lock_init.
+ */
+ retval = 0;
+ retval += __db_shalloc_size(sizeof(DB_LOCKREGION), 1);
+ retval += __db_shalloc_size(dbenv->lk_modes * dbenv->lk_modes, 1);
+ retval += __db_shalloc_size(
+ __db_tablesize(dbenv->lk_max_lockers) * (sizeof(DB_HASHTAB)), 1);
+ retval += __db_shalloc_size(
+ __db_tablesize(dbenv->lk_max_objects) * (sizeof(DB_HASHTAB)), 1);
+#ifdef MUTEX_SYSTEM_RESOURCES
+ retval +=
+ __db_shalloc_size(sizeof(REGMAINT) + __lock_region_maint(dbenv), 1);
+#endif
+ retval += __db_shalloc_size(
+ sizeof(struct __db_lock), MUTEX_ALIGN) * dbenv->lk_max;
+ retval += __db_shalloc_size(sizeof(DB_LOCKOBJ), 1) * dbenv->lk_max_objects;
+ retval += __db_shalloc_size(sizeof(DB_LOCKER), 1) * dbenv->lk_max_lockers;
+
+ /*
+ * Include 16 bytes of string space per lock. DB doesn't use it
+ * because we pre-allocate lock space for DBTs in the structure.
+ */
+ retval += __db_shalloc_size(dbenv->lk_max * 16, sizeof(size_t));
+
+ /* And we keep getting this wrong, let's be generous. */
+ retval += retval / 4;
+
+ return (retval);
+}
+
+#ifdef MUTEX_SYSTEM_RESOURCES
+/*
+ * __lock_region_maint --
+ * Return the amount of space needed for region maintenance info.
+ */
+static size_t
+__lock_region_maint(dbenv)
+ DB_ENV *dbenv;
+{
+ size_t s;
+
+ s = sizeof(MUTEX *) * dbenv->lk_max;
+ return (s);
+}
+#endif
+
+/*
+ * __lock_region_destroy
+ * Destroy any region maintenance info.
+ *
+ * PUBLIC: void __lock_region_destroy __P((DB_ENV *, REGINFO *));
+ */
+void
+__lock_region_destroy(dbenv, infop)
+ DB_ENV *dbenv;
+ REGINFO *infop;
+{
+ DB_LOCKREGION *region;
+
+ COMPQUIET(dbenv, NULL);
+ region = R_ADDR(infop, infop->rp->primary);
+
+ __db_shlocks_destroy(infop,
+ (REGMAINT *)R_ADDR(infop, region->maint_off));
+ return;
+}
diff --git a/bdb/lock/lock_stat.c b/bdb/lock/lock_stat.c
new file mode 100644
index 00000000000..ed5b60d0d7a
--- /dev/null
+++ b/bdb/lock/lock_stat.c
@@ -0,0 +1,308 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: lock_stat.c,v 11.4 2000/12/08 20:15:31 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#endif
+
+#ifdef HAVE_RPC
+#include "db_server.h"
+#endif
+
+#include "db_int.h"
+#include "db_shash.h"
+#include "lock.h"
+
+#ifdef HAVE_RPC
+#include "gen_client_ext.h"
+#include "rpc_client_ext.h"
+#endif
+
+static void __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKER *, FILE *));
+static void __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *, FILE *));
+static const char *
+ __lock_dump_status __P((db_status_t));
+
+/*
+ * lock_stat --
+ * Return LOCK statistics.
+ */
+int
+lock_stat(dbenv, statp, db_malloc)
+ DB_ENV *dbenv;
+ DB_LOCK_STAT **statp;
+ void *(*db_malloc) __P((size_t));
+{
+ DB_LOCKREGION *region;
+ DB_LOCKTAB *lt;
+ DB_LOCK_STAT *stats;
+ int ret;
+
+#ifdef HAVE_RPC
+ if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
+ return (__dbcl_lock_stat(dbenv, statp, db_malloc));
+#endif
+
+ PANIC_CHECK(dbenv);
+ ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK);
+
+ *statp = NULL;
+
+ lt = dbenv->lk_handle;
+
+ if ((ret = __os_malloc(dbenv, sizeof(*stats), db_malloc, &stats)) != 0)
+ return (ret);
+
+ /* Copy out the global statistics. */
+ R_LOCK(dbenv, &lt->reginfo);
+
+ region = lt->reginfo.primary;
+ stats->st_lastid = region->id;
+ stats->st_maxlocks = region->maxlocks;
+ stats->st_maxlockers = region->maxlockers;
+ stats->st_maxobjects = region->maxobjects;
+ stats->st_nmodes = region->nmodes;
+ stats->st_nlockers = region->nlockers;
+ stats->st_maxnlockers = region->maxnlockers;
+ stats->st_nobjects = region->nobjects;
+ stats->st_maxnobjects = region->maxnobjects;
+ stats->st_nlocks = region->nlocks;
+ stats->st_maxnlocks = region->maxnlocks;
+ stats->st_nconflicts = region->nconflicts;
+ stats->st_nrequests = region->nrequests;
+ stats->st_nreleases = region->nreleases;
+ stats->st_nnowaits = region->nnowaits;
+ stats->st_ndeadlocks = region->ndeadlocks;
+
+ stats->st_region_wait = lt->reginfo.rp->mutex.mutex_set_wait;
+ stats->st_region_nowait = lt->reginfo.rp->mutex.mutex_set_nowait;
+ stats->st_regsize = lt->reginfo.rp->size;
+
+ R_UNLOCK(dbenv, &lt->reginfo);
+
+ *statp = stats;
+ return (0);
+}
+
+#define LOCK_DUMP_CONF 0x001 /* Conflict matrix. */
+#define LOCK_DUMP_FREE 0x002 /* Display lock free list. */
+#define LOCK_DUMP_LOCKERS 0x004 /* Display lockers. */
+#define LOCK_DUMP_MEM 0x008 /* Display region memory. */
+#define LOCK_DUMP_OBJECTS 0x010 /* Display objects. */
+#define LOCK_DUMP_ALL 0x01f /* Display all. */
+
+/*
+ * __lock_dump_region --
+ *
+ * PUBLIC: void __lock_dump_region __P((DB_ENV *, char *, FILE *));
+ */
+void
+__lock_dump_region(dbenv, area, fp)
+ DB_ENV *dbenv;
+ char *area;
+ FILE *fp;
+{
+ struct __db_lock *lp;
+ DB_LOCKER *lip;
+ DB_LOCKOBJ *op;
+ DB_LOCKREGION *lrp;
+ DB_LOCKTAB *lt;
+ u_int32_t flags, i, j;
+ int label;
+
+ /* Make it easy to call from the debugger. */
+ if (fp == NULL)
+ fp = stderr;
+
+ for (flags = 0; *area != '\0'; ++area)
+ switch (*area) {
+ case 'A':
+ LF_SET(LOCK_DUMP_ALL);
+ break;
+ case 'c':
+ LF_SET(LOCK_DUMP_CONF);
+ break;
+ case 'f':
+ LF_SET(LOCK_DUMP_FREE);
+ break;
+ case 'l':
+ LF_SET(LOCK_DUMP_LOCKERS);
+ break;
+ case 'm':
+ LF_SET(LOCK_DUMP_MEM);
+ break;
+ case 'o':
+ LF_SET(LOCK_DUMP_OBJECTS);
+ break;
+ }
+
+ lt = dbenv->lk_handle;
+ lrp = lt->reginfo.primary;
+ LOCKREGION(dbenv, lt);
+
+ fprintf(fp, "%s\nLock region parameters\n", DB_LINE);
+ fprintf(fp, "%s: %lu, %s: %lu, %s: %lu, %s: %lu, %s: %lu, %s: %lu, %s: %lu\n",
+ "locker table size", (u_long)lrp->locker_t_size,
+ "object table size", (u_long)lrp->object_t_size,
+ "obj_off", (u_long)lrp->obj_off,
+ "osynch_off", (u_long)lrp->osynch_off,
+ "locker_off", (u_long)lrp->locker_off,
+ "lsynch_off", (u_long)lrp->lsynch_off,
+ "need_dd", (u_long)lrp->need_dd);
+
+ if (LF_ISSET(LOCK_DUMP_CONF)) {
+ fprintf(fp, "\n%s\nConflict matrix\n", DB_LINE);
+ for (i = 0; i < lrp->nmodes; i++) {
+ for (j = 0; j < lrp->nmodes; j++)
+ fprintf(fp, "%lu\t",
+ (u_long)lt->conflicts[i * lrp->nmodes + j]);
+ fprintf(fp, "\n");
+ }
+ }
+
+ if (LF_ISSET(LOCK_DUMP_LOCKERS)) {
+ fprintf(fp, "%s\nLocker hash buckets\n", DB_LINE);
+ for (i = 0; i < lrp->locker_t_size; i++) {
+ label = 1;
+ for (lip =
+ SH_TAILQ_FIRST(&lt->locker_tab[i], __db_locker);
+ lip != NULL;
+ lip = SH_TAILQ_NEXT(lip, links, __db_locker)) {
+ if (label) {
+ fprintf(fp, "Bucket %lu:\n", (u_long)i);
+ label = 0;
+ }
+ __lock_dump_locker(lt, lip, fp);
+ }
+ }
+ }
+
+ if (LF_ISSET(LOCK_DUMP_OBJECTS)) {
+ fprintf(fp, "%s\nObject hash buckets\n", DB_LINE);
+ for (i = 0; i < lrp->object_t_size; i++) {
+ label = 1;
+ for (op = SH_TAILQ_FIRST(&lt->obj_tab[i], __db_lockobj);
+ op != NULL;
+ op = SH_TAILQ_NEXT(op, links, __db_lockobj)) {
+ if (label) {
+ fprintf(fp, "Bucket %lu:\n", (u_long)i);
+ label = 0;
+ }
+ __lock_dump_object(lt, op, fp);
+ }
+ }
+ }
+
+ if (LF_ISSET(LOCK_DUMP_FREE)) {
+ fprintf(fp, "%s\nLock free list\n", DB_LINE);
+ for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
+ lp != NULL;
+ lp = SH_TAILQ_NEXT(lp, links, __db_lock))
+ fprintf(fp, "0x%lx: %lu\t%lu\t%s\t0x%lx\n", (u_long)lp,
+ (u_long)lp->holder, (u_long)lp->mode,
+ __lock_dump_status(lp->status), (u_long)lp->obj);
+
+ fprintf(fp, "%s\nObject free list\n", DB_LINE);
+ for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
+ op != NULL;
+ op = SH_TAILQ_NEXT(op, links, __db_lockobj))
+ fprintf(fp, "0x%lx\n", (u_long)op);
+
+ fprintf(fp, "%s\nLocker free list\n", DB_LINE);
+ for (lip = SH_TAILQ_FIRST(&lrp->free_lockers, __db_locker);
+ lip != NULL;
+ lip = SH_TAILQ_NEXT(lip, links, __db_locker))
+ fprintf(fp, "0x%lx\n", (u_long)lip);
+ }
+
+ if (LF_ISSET(LOCK_DUMP_MEM))
+ __db_shalloc_dump(lt->reginfo.addr, fp);
+
+ UNLOCKREGION(dbenv, lt);
+}
+
+static void
+__lock_dump_locker(lt, lip, fp)
+ DB_LOCKTAB *lt;
+ DB_LOCKER *lip;
+ FILE *fp;
+{
+ struct __db_lock *lp;
+
+ fprintf(fp, "L %lx [%ld]", (u_long)lip->id, (long)lip->dd_id);
+ fprintf(fp, " %s ", F_ISSET(lip, DB_LOCKER_DELETED) ? "(D)" : " ");
+
+ if ((lp = SH_LIST_FIRST(&lip->heldby, __db_lock)) == NULL)
+ fprintf(fp, "\n");
+ else
+ for (; lp != NULL;
+ lp = SH_LIST_NEXT(lp, locker_links, __db_lock))
+ __lock_printlock(lt, lp, 1);
+}
+
+static void
+__lock_dump_object(lt, op, fp)
+ DB_LOCKTAB *lt;
+ DB_LOCKOBJ *op;
+ FILE *fp;
+{
+ struct __db_lock *lp;
+ u_int32_t j;
+ u_int8_t *ptr;
+ u_int ch;
+
+ ptr = SH_DBT_PTR(&op->lockobj);
+ for (j = 0; j < op->lockobj.size; ptr++, j++) {
+ ch = *ptr;
+ fprintf(fp, isprint(ch) ? "%c" : "\\%o", ch);
+ }
+ fprintf(fp, "\n");
+
+ fprintf(fp, "H:");
+ for (lp =
+ SH_TAILQ_FIRST(&op->holders, __db_lock);
+ lp != NULL;
+ lp = SH_TAILQ_NEXT(lp, links, __db_lock))
+ __lock_printlock(lt, lp, 1);
+ lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
+ if (lp != NULL) {
+ fprintf(fp, "\nW:");
+ for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock))
+ __lock_printlock(lt, lp, 1);
+ }
+}
+
+static const char *
+__lock_dump_status(status)
+ db_status_t status;
+{
+ switch (status) {
+ case DB_LSTAT_ABORTED:
+ return ("aborted");
+ case DB_LSTAT_ERR:
+ return ("err");
+ case DB_LSTAT_FREE:
+ return ("free");
+ case DB_LSTAT_HELD:
+ return ("held");
+ case DB_LSTAT_NOGRANT:
+ return ("nogrant");
+ case DB_LSTAT_PENDING:
+ return ("pending");
+ case DB_LSTAT_WAITING:
+ return ("waiting");
+ }
+ return ("unknown status");
+}
diff --git a/bdb/lock/lock_util.c b/bdb/lock/lock_util.c
new file mode 100644
index 00000000000..fd5c6ad90cb
--- /dev/null
+++ b/bdb/lock/lock_util.c
@@ -0,0 +1,138 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: lock_util.c,v 11.5 2000/07/04 18:28:24 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "hash.h"
+#include "lock.h"
+
+/*
+ * __lock_cmp --
+ * This function is used to compare a DBT that is about to be entered
+ * into a hash table with an object already in the hash table. Note
+ * that it just returns true on equal and 0 on not-equal. Therefore
+ * this function cannot be used as a sort function; its purpose is to
+ * be used as a hash comparison function.
+ *
+ * PUBLIC: int __lock_cmp __P((const DBT *, DB_LOCKOBJ *));
+ */
+int
+__lock_cmp(dbt, lock_obj)
+ const DBT *dbt;
+ DB_LOCKOBJ *lock_obj;
+{
+ void *obj_data;
+
+ obj_data = SH_DBT_PTR(&lock_obj->lockobj);
+ return (dbt->size == lock_obj->lockobj.size &&
+ memcmp(dbt->data, obj_data, dbt->size) == 0);
+}
+
+/*
+ * PUBLIC: int __lock_locker_cmp __P((u_int32_t, DB_LOCKER *));
+ */
+int
+__lock_locker_cmp(locker, sh_locker)
+ u_int32_t locker;
+ DB_LOCKER *sh_locker;
+{
+ return (locker == sh_locker->id);
+}
+
+/*
+ * The next two functions are the hash functions used to store objects in the
+ * lock hash tables. They are hashing the same items, but one (__lock_ohash)
+ * takes a DBT (used for hashing a parameter passed from the user) and the
+ * other (__lock_lhash) takes a DB_LOCKOBJ (used for hashing something that is
+ * already in the lock manager). In both cases, we have a special check to
+ * fast path the case where we think we are doing a hash on a DB page/fileid
+ * pair. If the size is right, then we do the fast hash.
+ *
+ * We know that DB uses DB_LOCK_ILOCK types for its lock objects. The first
+ * four bytes are the 4-byte page number and the next DB_FILE_ID_LEN bytes
+ * are a unique file id, where the first 4 bytes on UNIX systems are the file
+ * inode number, and the first 4 bytes on Windows systems are the FileIndexLow
+ * bytes. So, we use the XOR of the page number and the first four bytes of
+ * the file id to produce a 32-bit hash value.
+ *
+ * We have no particular reason to believe that this algorithm will produce
+ * a good hash, but we want a fast hash more than we want a good one, when
+ * we're coming through this code path.
+ */
+#define FAST_HASH(P) { \
+ u_int32_t __h; \
+ u_int8_t *__cp, *__hp; \
+ __hp = (u_int8_t *)&__h; \
+ __cp = (u_int8_t *)(P); \
+ __hp[0] = __cp[0] ^ __cp[4]; \
+ __hp[1] = __cp[1] ^ __cp[5]; \
+ __hp[2] = __cp[2] ^ __cp[6]; \
+ __hp[3] = __cp[3] ^ __cp[7]; \
+ return (__h); \
+}
+
+/*
+ * __lock_ohash --
+ *
+ * PUBLIC: u_int32_t __lock_ohash __P((const DBT *));
+ */
+u_int32_t
+__lock_ohash(dbt)
+ const DBT *dbt;
+{
+ if (dbt->size == sizeof(DB_LOCK_ILOCK))
+ FAST_HASH(dbt->data);
+
+ return (__ham_func5(NULL, dbt->data, dbt->size));
+}
+
+/*
+ * __lock_lhash --
+ *
+ * PUBLIC: u_int32_t __lock_lhash __P((DB_LOCKOBJ *));
+ */
+u_int32_t
+__lock_lhash(lock_obj)
+ DB_LOCKOBJ *lock_obj;
+{
+ void *obj_data;
+
+ obj_data = SH_DBT_PTR(&lock_obj->lockobj);
+
+ if (lock_obj->lockobj.size == sizeof(DB_LOCK_ILOCK))
+ FAST_HASH(obj_data);
+
+ return (__ham_func5(NULL, obj_data, lock_obj->lockobj.size));
+}
+
+/*
+ * __lock_locker_hash --
+ * Hash function for entering lockers into the locker hash table.
+ * Since these are simply 32-bit unsigned integers, just return
+ * the locker value.
+ *
+ * PUBLIC: u_int32_t __lock_locker_hash __P((u_int32_t));
+ */
+u_int32_t
+__lock_locker_hash(locker)
+ u_int32_t locker;
+{
+ return (locker);
+}