diff options
Diffstat (limited to 'bdb/lock/lock.c')
-rw-r--r-- | bdb/lock/lock.c | 1085 |
1 files changed, 760 insertions, 325 deletions
diff --git a/bdb/lock/lock.c b/bdb/lock/lock.c index 8d246f7ded3..8eda155b822 100644 --- a/bdb/lock/lock.c +++ b/bdb/lock/lock.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: lock.c,v 11.40 2000/12/19 23:18:58 ubell Exp $"; +static const char revid[] = "$Id: lock.c,v 11.108 2002/08/06 06:11:34 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -17,118 +17,183 @@ static const char revid[] = "$Id: lock.c,v 11.40 2000/12/19 23:18:58 ubell Exp $ #include <string.h> #endif -#ifdef HAVE_RPC -#include "db_server.h" -#endif - #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "lock.h" -#include "log.h" -#include "db_am.h" -#include "txn.h" - -#ifdef HAVE_RPC -#include "gen_client_ext.h" -#include "rpc_client_ext.h" -#endif +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" static int __lock_checklocker __P((DB_LOCKTAB *, - struct __db_lock *, u_int32_t, u_int32_t, int *)); -static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, - u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); -static int __lock_is_parent __P((DB_LOCKTAB *, u_int32_t, DB_LOCKER *)); -static int __lock_put_internal __P((DB_LOCKTAB *, - struct __db_lock *, u_int32_t, u_int32_t)); -static int __lock_put_nolock __P((DB_ENV *, DB_LOCK *, int *, int)); -static void __lock_remove_waiter __P((DB_ENV *, - DB_LOCKOBJ *, struct __db_lock *, db_status_t)); + struct __db_lock *, u_int32_t, u_int32_t)); +static void __lock_expires __P((DB_ENV *, db_timeval_t *, db_timeout_t)); +static void __lock_freelocker + __P((DB_LOCKTAB *, DB_LOCKREGION *, DB_LOCKER *, u_int32_t)); +static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, u_int32_t, + const DBT *, db_lockmode_t, db_timeout_t, DB_LOCK *)); +static int __lock_getobj + __P((DB_LOCKTAB *, const DBT *, u_int32_t, int, DB_LOCKOBJ **)); +static int __lock_is_parent __P((DB_LOCKTAB *, u_int32_t, DB_LOCKER *)); +static int __lock_put_internal __P((DB_LOCKTAB *, + struct __db_lock *, u_int32_t, u_int32_t)); +static int __lock_put_nolock __P((DB_ENV *, DB_LOCK *, int *, u_int32_t)); +static void __lock_remove_waiter __P((DB_LOCKTAB *, + DB_LOCKOBJ *, struct __db_lock *, db_status_t)); +static int __lock_trade __P((DB_ENV *, DB_LOCK *, u_int32_t)); static const char __db_lock_err[] = "Lock table is out of available %s"; static const char __db_lock_invalid[] = "%s: Lock is no longer valid"; static const char __db_locker_invalid[] = "Locker is not valid"; /* - * lock_id -- + * __lock_id -- * Generate a unique locker id. + * + * PUBLIC: int __lock_id __P((DB_ENV *, u_int32_t *)); */ int -lock_id(dbenv, idp) +__lock_id(dbenv, idp) DB_ENV *dbenv; u_int32_t *idp; { + DB_LOCKER *lk; DB_LOCKTAB *lt; DB_LOCKREGION *region; - -#ifdef HAVE_RPC - if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) - return (__dbcl_lock_id(dbenv, idp)); -#endif + u_int32_t *ids, locker_ndx; + int nids, ret; PANIC_CHECK(dbenv); - ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + ENV_REQUIRES_CONFIG(dbenv, + dbenv->lk_handle, "DB_ENV->lock_id", DB_INIT_LOCK); lt = dbenv->lk_handle; region = lt->reginfo.primary; + ret = 0; /* - * Note that we are letting locker IDs wrap. - * - * This is potentially dangerous in that it's conceivable that you - * could be allocating a new locker id and still have someone using - * it. However, the alternatives are that we keep a bitmap of - * locker ids or we forbid wrapping. Both are probably bad. The - * bitmap of locker ids will take up 64 MB of space. Forbidding - * wrapping means that we'll run out of locker IDs after 2 billion. - * In order for the wrap bug to fire, we'd need to have something - * that stayed open while 2 billion locker ids were used up. Since - * we cache cursors it means that something would have to stay open - * sufficiently long that we open and close a lot of files and a - * lot of cursors within them. Betting that this won't happen seems - * to the lesser of the evils. + * Allocate a new lock id. If we wrap around then we + * find the minimum currently in use and make sure we + * can stay below that. This code is similar to code + * in __txn_begin_int for recovering txn ids. */ LOCKREGION(dbenv, lt); - if (region->id >= DB_LOCK_MAXID) - region->id = 0; - *idp = ++region->id; - UNLOCKREGION(dbenv, lt); + /* + * Our current valid range can span the maximum valid value, so check + * for it and wrap manually. + */ + if (region->stat.st_id == DB_LOCK_MAXID && + region->stat.st_cur_maxid != DB_LOCK_MAXID) + region->stat.st_id = DB_LOCK_INVALIDID; + if (region->stat.st_id == region->stat.st_cur_maxid) { + if ((ret = __os_malloc(dbenv, + sizeof(u_int32_t) * region->stat.st_nlockers, &ids)) != 0) + goto err; + nids = 0; + for (lk = SH_TAILQ_FIRST(®ion->lockers, __db_locker); + lk != NULL; + lk = SH_TAILQ_NEXT(lk, ulinks, __db_locker)) + ids[nids++] = lk->id; + region->stat.st_id = DB_LOCK_INVALIDID; + region->stat.st_cur_maxid = DB_LOCK_MAXID; + if (nids != 0) + __db_idspace(ids, nids, + ®ion->stat.st_id, ®ion->stat.st_cur_maxid); + __os_free(dbenv, ids); + } + *idp = ++region->stat.st_id; - return (0); + /* Allocate a locker for this id. */ + LOCKER_LOCK(lt, region, *idp, locker_ndx); + ret = __lock_getlocker(lt, *idp, locker_ndx, 1, &lk); + +err: UNLOCKREGION(dbenv, lt); + + return (ret); } /* - * Vector lock routine. This function takes a set of operations - * and performs them all at once. In addition, lock_vec provides - * functionality for lock inheritance, releasing all locks for a - * given locker (used during transaction commit/abort), releasing - * all locks on a given object, and generating debugging information. + * __lock_id_free -- + * Free a locker id. + * + * PUBLIC: int __lock_id_free __P((DB_ENV *, u_int32_t)); */ int -lock_vec(dbenv, locker, flags, list, nlist, elistp) +__lock_id_free(dbenv, id) + DB_ENV *dbenv; + u_int32_t id; +{ + DB_LOCKER *sh_locker; + DB_LOCKTAB *lt; + DB_LOCKREGION *region; + u_int32_t locker_ndx; + int ret; + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, + dbenv->lk_handle, "DB_ENV->lock_id_free", DB_INIT_LOCK); + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + LOCKREGION(dbenv, lt); + LOCKER_LOCK(lt, region, id, locker_ndx); + if ((ret = + __lock_getlocker(lt, id, locker_ndx, 0, &sh_locker)) != 0) + goto err; + if (sh_locker == NULL) { + ret = EINVAL; + goto err; + } + + if (sh_locker->nlocks != 0) { + __db_err(dbenv, "Locker still has locks"); + ret = EINVAL; + goto err; + } + + __lock_freelocker(lt, region, sh_locker, locker_ndx); + +err: UNLOCKREGION(dbenv, lt); + return (ret); +} + +/* + * __lock_vec -- + * Vector lock routine. This function takes a set of operations + * and performs them all at once. In addition, lock_vec provides + * functionality for lock inheritance, releasing all locks for a + * given locker (used during transaction commit/abort), releasing + * all locks on a given object, and generating debugging information. + * + * PUBLIC: int __lock_vec __P((DB_ENV *, + * PUBLIC: u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); + */ +int +__lock_vec(dbenv, locker, flags, list, nlist, elistp) DB_ENV *dbenv; u_int32_t locker, flags; int nlist; DB_LOCKREQ *list, **elistp; { struct __db_lock *lp, *next_lock; + DB_LOCK lock; DB_LOCKER *sh_locker, *sh_parent; DB_LOCKOBJ *obj, *sh_obj; DB_LOCKREGION *region; DB_LOCKTAB *lt; u_int32_t lndx, ndx; - int did_abort, i, ret, run_dd; + int did_abort, i, ret, run_dd, upgrade, writes; -#ifdef HAVE_RPC - if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) - return (__dbcl_lock_vec(dbenv, locker, - flags, list, nlist, elistp)); -#endif PANIC_CHECK(dbenv); - ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + ENV_REQUIRES_CONFIG(dbenv, + dbenv->lk_handle, "DB_ENV->lock_vec", DB_INIT_LOCK); + + /* Check if locks have been globally turned off. */ + if (F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); /* Validate arguments. */ - if ((ret = __db_fchk(dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0) + if ((ret = __db_fchk(dbenv, "DB_ENV->lock_vec", + flags, DB_LOCK_FREE_LOCKER | DB_LOCK_NOWAIT)) != 0) return (ret); lt = dbenv->lk_handle; @@ -138,13 +203,14 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); for (i = 0, ret = 0; i < nlist && ret == 0; i++) switch (list[i].op) { + case DB_LOCK_GET_TIMEOUT: + LF_SET(DB_LOCK_SET_TIMEOUT); case DB_LOCK_GET: ret = __lock_get_internal(dbenv->lk_handle, - locker, flags, - list[i].obj, list[i].mode, &list[i].lock); + locker, flags, list[i].obj, + list[i].mode, list[i].timeout, &list[i].lock); break; case DB_LOCK_INHERIT: - /* * Get the committing locker and mark it as deleted. * This allows us to traverse the locker links without @@ -159,7 +225,7 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) sh_locker == NULL || F_ISSET(sh_locker, DB_LOCKER_DELETED)) { if (ret == 0 && sh_locker != NULL) - ret = EACCES; + ret = EINVAL; __db_err(dbenv, __db_locker_invalid); break; } @@ -182,8 +248,8 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) if (F_ISSET(sh_parent, DB_LOCKER_DELETED)) { if (ret == 0) { __db_err(dbenv, - "Parent locker is not valid"); - ret = EACCES; + "Parent locker is not valid"); + ret = EINVAL; } break; } @@ -203,15 +269,21 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) LF_ISSET(DB_LOCK_NOWAITERS)); } + /* Transfer child counts to parent. */ + sh_parent->nlocks += sh_locker->nlocks; + sh_parent->nwrites += sh_locker->nwrites; + /* Now free the original locker. */ ret = __lock_checklocker(lt, - NULL, locker, DB_LOCK_IGNOREDEL, NULL); + NULL, locker, DB_LOCK_IGNOREDEL); break; case DB_LOCK_PUT: - ret = - __lock_put_nolock(dbenv, &list[i].lock, &run_dd, 0); + ret = __lock_put_nolock(dbenv, + &list[i].lock, &run_dd, flags); break; case DB_LOCK_PUT_ALL: + case DB_LOCK_PUT_READ: + case DB_LOCK_UPGRADE_WRITE: /* * Get the locker and mark it as deleted. This * allows us to traverse the locker links without @@ -232,23 +304,79 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) * to do. */ break; + upgrade = 0; + writes = 1; + if (list[i].op == DB_LOCK_PUT_READ) + writes = 0; + else if (list[i].op == DB_LOCK_UPGRADE_WRITE) { + if (F_ISSET(sh_locker, DB_LOCKER_DIRTY)) + upgrade = 1; + writes = 0; + } + F_SET(sh_locker, DB_LOCKER_DELETED); /* Now traverse the locks, releasing each one. */ for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); - lp != NULL; - lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) { - SH_LIST_REMOVE(lp, locker_links, __db_lock); - sh_obj = - (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); - SHOBJECT_LOCK(lt, region, sh_obj, lndx); - ret = __lock_put_internal(lt, - lp, lndx, DB_LOCK_FREE | DB_LOCK_DOALL); - if (ret != 0) - break; + lp != NULL;) { + sh_obj = (DB_LOCKOBJ *) + ((u_int8_t *)lp + lp->obj); + if (writes == 1 || lp->mode == DB_LOCK_READ) { + SH_LIST_REMOVE(lp, + locker_links, __db_lock); + sh_obj = (DB_LOCKOBJ *) + ((u_int8_t *)lp + lp->obj); + SHOBJECT_LOCK(lt, region, sh_obj, lndx); + /* + * We are not letting lock_put_internal + * unlink the lock, so we'll have to + * update counts here. + */ + sh_locker->nlocks--; + if (IS_WRITELOCK(lp->mode)) + sh_locker->nwrites--; + ret = __lock_put_internal(lt, lp, + lndx, DB_LOCK_FREE | DB_LOCK_DOALL); + if (ret != 0) + break; + lp = SH_LIST_FIRST( + &sh_locker->heldby, __db_lock); + } else + lp = SH_LIST_NEXT(lp, + locker_links, __db_lock); + } + switch (list[i].op) { + case DB_LOCK_UPGRADE_WRITE: + if (upgrade != 1) + goto up_done; + for (lp = SH_LIST_FIRST( + &sh_locker->heldby, __db_lock); + lp != NULL; + lp = SH_LIST_NEXT(lp, + locker_links, __db_lock)) { + if (ret != 0) + break; + lock.off = R_OFFSET(<->reginfo, lp); + lock.gen = lp->gen; + F_SET(sh_locker, DB_LOCKER_INABORT); + ret = __lock_get_internal(lt, + locker, DB_LOCK_UPGRADE, + NULL, DB_LOCK_WRITE, 0, &lock); + } + up_done: + /* FALL THROUGH */ + case DB_LOCK_PUT_READ: + F_CLR(sh_locker, DB_LOCKER_DELETED); + break; + + case DB_LOCK_PUT_ALL: + if (ret == 0) + ret = __lock_checklocker(lt, + NULL, locker, DB_LOCK_IGNOREDEL); + break; + default: + break; } - ret = __lock_checklocker(lt, - NULL, locker, DB_LOCK_IGNOREDEL, NULL); break; case DB_LOCK_PUT_OBJ: /* Remove all the locks associated with an object. */ @@ -269,8 +397,9 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock); ret == 0 && lp != NULL; lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) - ret = __lock_put_internal(lt, - lp, ndx, DB_LOCK_NOPROMOTE | DB_LOCK_DOALL); + ret = __lock_put_internal(lt, lp, ndx, + DB_LOCK_UNLINK | + DB_LOCK_NOPROMOTE | DB_LOCK_DOALL); /* * On the last time around, the object will get @@ -281,18 +410,43 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) ret == 0 && lp != NULL; lp = next_lock) { next_lock = SH_TAILQ_NEXT(lp, links, __db_lock); - ret = __lock_put_internal(lt, - lp, ndx, DB_LOCK_NOPROMOTE | DB_LOCK_DOALL); + ret = __lock_put_internal(lt, lp, ndx, + DB_LOCK_UNLINK | + DB_LOCK_NOPROMOTE | DB_LOCK_DOALL); } break; + + case DB_LOCK_TIMEOUT: + ret = __lock_set_timeout(dbenv, + locker, 0, DB_SET_TXN_NOW); + region->need_dd = 1; + break; + + case DB_LOCK_TRADE: + /* + * INTERNAL USE ONLY. + * Change the holder of the lock described in + * list[i].lock to the locker-id specified by + * the locker parameter. + */ + /* + * You had better know what you're doing here. + * We are trading locker-id's on a lock to + * facilitate file locking on open DB handles. + * We do not do any conflict checking on this, + * so heaven help you if you use this flag under + * any other circumstances. + */ + ret = __lock_trade(dbenv, &list[i].lock, locker); + break; #ifdef DEBUG case DB_LOCK_DUMP: /* Find the locker. */ LOCKER_LOCK(lt, region, locker, ndx); if ((ret = __lock_getlocker(lt, - locker, ndx, 0, &sh_locker)) != 0 - || sh_locker == NULL - || F_ISSET(sh_locker, DB_LOCKER_DELETED)) + locker, ndx, 0, &sh_locker)) != 0 || + sh_locker == NULL || + F_ISSET(sh_locker, DB_LOCKER_DELETED)) break; for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); @@ -309,14 +463,12 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) break; } - if (ret == 0 && region->need_dd && region->detect != DB_LOCK_NORUN) { + if (ret == 0 && region->need_dd && region->detect != DB_LOCK_NORUN) run_dd = 1; - region->need_dd = 0; - } UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); if (run_dd) - (void)lock_detect(dbenv, 0, region->detect, &did_abort); + (void)dbenv->lock_detect(dbenv, 0, region->detect, &did_abort); if (ret != 0 && elistp != NULL) *elistp = &list[i - 1]; @@ -327,14 +479,17 @@ lock_vec(dbenv, locker, flags, list, nlist, elistp) /* * Lock acquisition routines. There are two library interfaces: * - * lock_get -- + * __lock_get -- * original lock get interface that takes a locker id. * * All the work for lock_get (and for the GET option of lock_vec) is done * inside of lock_get_internal. + * + * PUBLIC: int __lock_get __P((DB_ENV *, + * PUBLIC: u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); */ int -lock_get(dbenv, locker, flags, obj, lock_mode, lock) +__lock_get(dbenv, locker, flags, obj, lock_mode, lock) DB_ENV *dbenv; u_int32_t locker, flags; const DBT *obj; @@ -343,95 +498,103 @@ lock_get(dbenv, locker, flags, obj, lock_mode, lock) { int ret; -#ifdef HAVE_RPC - if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) - return (__dbcl_lock_get(dbenv, locker, - flags, obj, lock_mode, lock)); -#endif PANIC_CHECK(dbenv); - ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + ENV_REQUIRES_CONFIG(dbenv, + dbenv->lk_handle, "DB_ENV->lock_get", DB_INIT_LOCK); if (IS_RECOVERING(dbenv)) { - lock->off = LOCK_INVALID; + LOCK_INIT(*lock); return (0); } /* Validate arguments. */ - if ((ret = __db_fchk(dbenv, - "lock_get", flags, + if ((ret = __db_fchk(dbenv, "DB_ENV->lock_get", flags, DB_LOCK_NOWAIT | DB_LOCK_UPGRADE | DB_LOCK_SWITCH)) != 0) return (ret); LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); ret = __lock_get_internal(dbenv->lk_handle, - locker, flags, obj, lock_mode, lock); + locker, flags, obj, lock_mode, 0, lock); UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); return (ret); } static int -__lock_get_internal(lt, locker, flags, obj, lock_mode, lock) +__lock_get_internal(lt, locker, flags, obj, lock_mode, timeout, lock) DB_LOCKTAB *lt; u_int32_t locker, flags; const DBT *obj; db_lockmode_t lock_mode; + db_timeout_t timeout; DB_LOCK *lock; { - struct __db_lock *newl, *lp; + struct __db_lock *newl, *lp, *wwrite; DB_ENV *dbenv; DB_LOCKER *sh_locker; DB_LOCKOBJ *sh_obj; DB_LOCKREGION *region; - u_int32_t locker_ndx; - int did_abort, freed, ihold, on_locker_list, no_dd, ret; + u_int32_t locker_ndx, obj_ndx; + int did_abort, ihold, on_locker_list, no_dd, ret; - no_dd = ret = 0; - on_locker_list = 0; - region = lt->reginfo.primary; dbenv = lt->dbenv; + region = lt->reginfo.primary; + on_locker_list = no_dd = ret = 0; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); /* - * If we are not going to reuse this lock, initialize - * the offset to invalid so that if we fail it - * will not look like a valid lock. + * If we are not going to reuse this lock, initialize the offset to + * invalid so that if we fail it will not look like a valid lock. */ if (!LF_ISSET(DB_LOCK_UPGRADE | DB_LOCK_SWITCH)) - lock->off = LOCK_INVALID; + LOCK_INIT(*lock); - /* - * Check that the lock mode is valid. - */ - if ((u_int32_t)lock_mode >= region->nmodes) { - __db_err(dbenv, - "lock_get: invalid lock mode %lu\n", (u_long)lock_mode); + /* Check that the lock mode is valid. */ + if ((u_int32_t)lock_mode >= region->stat.st_nmodes) { + __db_err(dbenv, "DB_ENV->lock_get: invalid lock mode %lu", + (u_long)lock_mode); return (EINVAL); } /* Allocate a new lock. Optimize for the common case of a grant. */ - region->nrequests++; + region->stat.st_nrequests++; if ((newl = SH_TAILQ_FIRST(®ion->free_locks, __db_lock)) != NULL) SH_TAILQ_REMOVE(®ion->free_locks, newl, links, __db_lock); if (newl == NULL) { __db_err(dbenv, __db_lock_err, "locks"); return (ENOMEM); } - if (++region->nlocks > region->maxnlocks) - region->maxnlocks = region->nlocks; + if (++region->stat.st_nlocks > region->stat.st_maxnlocks) + region->stat.st_maxnlocks = region->stat.st_nlocks; - /* Allocate a new object. */ - OBJECT_LOCK(lt, region, obj, lock->ndx); - if ((ret = __lock_getobj(lt, obj, lock->ndx, 1, &sh_obj)) != 0) - goto err; + if (obj == NULL) { + DB_ASSERT(LOCK_ISSET(*lock)); + lp = (struct __db_lock *)R_ADDR(<->reginfo, lock->off); + sh_obj = (DB_LOCKOBJ *) ((u_int8_t *)lp + lp->obj); + } else { + /* Allocate a shared memory new object. */ + OBJECT_LOCK(lt, region, obj, lock->ndx); + if ((ret = __lock_getobj(lt, obj, lock->ndx, 1, &sh_obj)) != 0) + goto err; + } /* Get the locker, we may need it to find our parent. */ LOCKER_LOCK(lt, region, locker, locker_ndx); - if ((ret = - __lock_getlocker(lt, locker, locker_ndx, 1, &sh_locker)) != 0) { + if ((ret = __lock_getlocker(lt, locker, + locker_ndx, locker > DB_LOCK_MAXID ? 1 : 0, &sh_locker)) != 0) { /* - * XXX: Margo - * CLEANUP the object and the lock. + * XXX We cannot tell if we created the object or not, + * so we don't kow if we should free it or not. */ - return (ret); + goto err; + } + + if (sh_locker == NULL) { + __db_err(dbenv, "Locker does not exist"); + ret = EINVAL; + goto err; } /* @@ -460,11 +623,11 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lock) if (LF_ISSET(DB_LOCK_SWITCH)) goto put_lock; + wwrite = NULL; for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { - if (locker == lp->holder || - __lock_is_parent(lt, lp->holder, sh_locker)) { + if (locker == lp->holder) { if (lp->mode == lock_mode && lp->status == DB_LSTAT_HELD) { if (LF_ISSET(DB_LOCK_UPGRADE)) @@ -473,20 +636,46 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lock) /* * Lock is held, so we can increment the * reference count and return this lock. + * We do not count reference increments + * towards the locks held by the locker. */ lp->refcount++; lock->off = R_OFFSET(<->reginfo, lp); lock->gen = lp->gen; + lock->mode = lp->mode; ret = 0; goto done; - } else + } else { ihold = 1; - } else if (CONFLICTS(lt, region, lp->mode, lock_mode)) + if (lock_mode == DB_LOCK_WRITE && + lp->mode == DB_LOCK_WWRITE) + wwrite = lp; + } + } else if (__lock_is_parent(lt, lp->holder, sh_locker)) + ihold = 1; + else if (CONFLICTS(lt, region, lp->mode, lock_mode)) break; } /* + * If we are looking to upgrade a WWRITE to a WRITE lock + * and there were no conflicting locks then we can just + * upgrade this lock to the one we want. + */ + if (wwrite != NULL && lp == NULL) { + lp = wwrite; + lp->mode = lock_mode; + lp->refcount++; + lock->off = R_OFFSET(<->reginfo, lp); + lock->gen = lp->gen; + lock->mode = lp->mode; + + ret = 0; + goto done; + } + + /* * Make the new lock point to the new object, initialize fields. * * This lock is not linked in anywhere, so we can muck with it @@ -542,10 +731,10 @@ put_lock: SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links); } else { ret = DB_LOCK_NOTGRANTED; - if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL - && LOCKER_FREEABLE(sh_locker)) - __lock_freelocker( lt, region, sh_locker, locker_ndx); - region->nnowaits++; + if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL && + LF_ISSET(DB_LOCK_FREE_LOCKER)) + __lock_freelocker(lt, region, sh_locker, locker_ndx); + region->stat.st_nnowaits++; goto err; } @@ -556,9 +745,9 @@ llist: * detector, save that information. */ on_locker_list = 1; - no_dd = sh_locker->master_locker == INVALID_ROFF - && SH_LIST_FIRST(&sh_locker->child_locker, __db_locker) == NULL - && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL; + no_dd = sh_locker->master_locker == INVALID_ROFF && + SH_LIST_FIRST(&sh_locker->child_locker, __db_locker) == NULL && + SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL; SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock); @@ -573,9 +762,60 @@ llist: * block. */ newl->status = DB_LSTAT_WAITING; - region->nconflicts++; - if (region->detect == DB_LOCK_NORUN) - region->need_dd = 1; + region->stat.st_nconflicts++; + region->need_dd = 1; + /* + * First check to see if this txn has expired. + * If not then see if the lock timeout is past + * the expiration of the txn, if it is, use + * the txn expiration time. lk_expire is passed + * to avoid an extra call to get the time. + */ + if (__lock_expired(dbenv, + &sh_locker->lk_expire, &sh_locker->tx_expire)) { + newl->status = DB_LSTAT_ABORTED; + region->stat.st_ndeadlocks++; + region->stat.st_ntxntimeouts++; + + /* + * Remove the lock from the wait queue and if + * this was the only lock on the wait queue remove + * this object from the deadlock detector object + * list. + */ + SH_LIST_REMOVE(newl, locker_links, __db_lock); + SH_TAILQ_REMOVE( + &sh_obj->waiters, newl, links, __db_lock); + if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) + SH_TAILQ_REMOVE(®ion->dd_objs, + sh_obj, dd_links, __db_lockobj); + + /* Clear the timeout, we are done. */ + LOCK_SET_TIME_INVALID(&sh_locker->tx_expire); + goto expired; + } + + /* + * If a timeout was specified in this call then it + * takes priority. If a lock timeout has been specified + * for this transaction then use that, otherwise use + * the global timeout value. + */ + if (!LF_ISSET(DB_LOCK_SET_TIMEOUT)) { + if (F_ISSET(sh_locker, DB_LOCKER_TIMEOUT)) + timeout = sh_locker->lk_timeout; + else + timeout = region->lk_timeout; + } + if (timeout != 0) + __lock_expires(dbenv, &sh_locker->lk_expire, timeout); + else + LOCK_SET_TIME_INVALID(&sh_locker->lk_expire); + + if (LOCK_TIME_ISVALID(&sh_locker->tx_expire) && + (timeout == 0 || __lock_expired(dbenv, + &sh_locker->lk_expire, &sh_locker->tx_expire))) + sh_locker->lk_expire = sh_locker->tx_expire; UNLOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); /* @@ -583,22 +823,41 @@ llist: * detector should be run. */ if (region->detect != DB_LOCK_NORUN && !no_dd) - (void)lock_detect(dbenv, 0, region->detect, &did_abort); + (void)dbenv->lock_detect( + dbenv, 0, region->detect, &did_abort); - MUTEX_LOCK(dbenv, &newl->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &newl->mutex); LOCKREGION(dbenv, (DB_LOCKTAB *)dbenv->lk_handle); +expired: /* Turn off lock timeout. */ + LOCK_SET_TIME_INVALID(&sh_locker->lk_expire); + if (newl->status != DB_LSTAT_PENDING) { - (void)__lock_checklocker(lt, - newl, newl->holder, 0, &freed); + (void)__lock_checklocker(lt, newl, newl->holder, 0); switch (newl->status) { case DB_LSTAT_ABORTED: on_locker_list = 0; ret = DB_LOCK_DEADLOCK; break; - case DB_LSTAT_NOGRANT: - ret = DB_LOCK_NOTGRANTED; + case DB_LSTAT_NOTEXIST: + ret = DB_LOCK_NOTEXIST; break; + case DB_LSTAT_EXPIRED: + SHOBJECT_LOCK(lt, + region, sh_obj, obj_ndx); + if ((ret = __lock_put_internal( + lt, newl, obj_ndx, 0) != 0)) + goto err; + if (LOCK_TIME_EQUAL( + &sh_locker->lk_expire, + &sh_locker->tx_expire)) { + region->stat.st_ndeadlocks++; + region->stat.st_ntxntimeouts++; + return (DB_LOCK_DEADLOCK); + } else { + region->stat.st_nlocktimeouts++; + return (DB_LOCK_NOTGRANTED); + } default: ret = EINVAL; break; @@ -624,6 +883,10 @@ llist: lock->off = R_OFFSET(<->reginfo, newl); lock->gen = newl->gen; + lock->mode = newl->mode; + sh_locker->nlocks++; + if (IS_WRITELOCK(newl->mode)) + sh_locker->nwrites++; return (0); @@ -631,18 +894,21 @@ upgrade:/* * This was an upgrade, so return the new lock to the free list and * upgrade the mode of the original lock. */ - ((struct __db_lock *)R_ADDR(<->reginfo, lock->off))->mode = lock_mode; + lp = (struct __db_lock *)R_ADDR(<->reginfo, lock->off); + if (IS_WRITELOCK(lock_mode) && !IS_WRITELOCK(lp->mode)) + sh_locker->nwrites++; + lp->mode = lock_mode; ret = 0; /* FALLTHROUGH */ done: err: newl->status = DB_LSTAT_FREE; + region->stat.st_nlocks--; if (on_locker_list) { SH_LIST_REMOVE(newl, locker_links, __db_lock); } SH_TAILQ_INSERT_HEAD(®ion->free_locks, newl, links, __db_lock); - region->nlocks--; return (ret); } @@ -651,21 +917,20 @@ err: newl->status = DB_LSTAT_FREE; * * The user callable one is lock_put and the three we use internally are * __lock_put_nolock, __lock_put_internal and __lock_downgrade. + * + * PUBLIC: int __lock_put __P((DB_ENV *, DB_LOCK *)); */ int -lock_put(dbenv, lock) +__lock_put(dbenv, lock) DB_ENV *dbenv; DB_LOCK *lock; { DB_LOCKTAB *lt; int ret, run_dd; -#ifdef HAVE_RPC - if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) - return (__dbcl_lock_put(dbenv, lock)); -#endif PANIC_CHECK(dbenv); - ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + ENV_REQUIRES_CONFIG(dbenv, + dbenv->lk_handle, "DB_LOCK->lock_put", DB_INIT_LOCK); if (IS_RECOVERING(dbenv)) return (0); @@ -676,8 +941,14 @@ lock_put(dbenv, lock) ret = __lock_put_nolock(dbenv, lock, &run_dd, 0); UNLOCKREGION(dbenv, lt); + /* + * Only run the lock detector if put told us to AND we are running + * in auto-detect mode. If we are not running in auto-detect, then + * a call to lock_detect here will 0 the need_dd bit, but will not + * actually abort anything. + */ if (ret == 0 && run_dd) - (void)lock_detect(dbenv, 0, + (void)dbenv->lock_detect(dbenv, 0, ((DB_LOCKREGION *)lt->reginfo.primary)->detect, NULL); return (ret); } @@ -687,41 +958,43 @@ __lock_put_nolock(dbenv, lock, runp, flags) DB_ENV *dbenv; DB_LOCK *lock; int *runp; - int flags; + u_int32_t flags; { struct __db_lock *lockp; DB_LOCKREGION *region; DB_LOCKTAB *lt; - u_int32_t locker; int ret; + /* Check if locks have been globally turned off. */ + if (F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + lt = dbenv->lk_handle; region = lt->reginfo.primary; lockp = (struct __db_lock *)R_ADDR(<->reginfo, lock->off); - lock->off = LOCK_INVALID; + LOCK_INIT(*lock); if (lock->gen != lockp->gen) { - __db_err(dbenv, __db_lock_invalid, "lock_put"); - return (EACCES); + __db_err(dbenv, __db_lock_invalid, "DB_LOCK->lock_put"); + return (EINVAL); } - locker = lockp->holder; ret = __lock_put_internal(lt, lockp, lock->ndx, flags | DB_LOCK_UNLINK | DB_LOCK_FREE); *runp = 0; - if (ret == 0 && region->need_dd && region->detect != DB_LOCK_NORUN) { + if (ret == 0 && region->need_dd && region->detect != DB_LOCK_NORUN) *runp = 1; - region->need_dd = 0; - } return (ret); } /* * __lock_downgrade -- - * Used by the concurrent access product to downgrade write locks - * back to iwrite locks. + * Used to downgrade locks. Currently this is used in two places, + * 1) by the concurrent access product to downgrade write locks + * back to iwrite locks and 2) to downgrade write-handle locks to read-handle + * locks at the end of an open/create. * * PUBLIC: int __lock_downgrade __P((DB_ENV *, * PUBLIC: DB_LOCK *, db_lockmode_t, u_int32_t)); @@ -734,14 +1007,21 @@ __lock_downgrade(dbenv, lock, new_mode, flags) u_int32_t flags; { struct __db_lock *lockp; + DB_LOCKER *sh_locker; DB_LOCKOBJ *obj; DB_LOCKREGION *region; DB_LOCKTAB *lt; + u_int32_t indx; int ret; COMPQUIET(flags, 0); PANIC_CHECK(dbenv); + ret = 0; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); lt = dbenv->lk_handle; region = lt->reginfo.primary; @@ -751,9 +1031,24 @@ __lock_downgrade(dbenv, lock, new_mode, flags) lockp = (struct __db_lock *)R_ADDR(<->reginfo, lock->off); if (lock->gen != lockp->gen) { __db_err(dbenv, __db_lock_invalid, "lock_downgrade"); - ret = EACCES; + ret = EINVAL; + goto out; + } + + LOCKER_LOCK(lt, region, lockp->holder, indx); + + if ((ret = __lock_getlocker(lt, lockp->holder, + indx, 0, &sh_locker)) != 0 || sh_locker == NULL) { + if (ret == 0) + ret = EINVAL; + __db_err(dbenv, __db_locker_invalid); goto out; } + if (IS_WRITELOCK(lockp->mode) && !IS_WRITELOCK(new_mode)) + sh_locker->nwrites--; + + if (new_mode == DB_LOCK_WWRITE) + F_SET(sh_locker, DB_LOCKER_DIRTY); lockp->mode = new_mode; @@ -761,25 +1056,23 @@ __lock_downgrade(dbenv, lock, new_mode, flags) obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); (void)__lock_promote(lt, obj, LF_ISSET(DB_LOCK_NOWAITERS)); - ++region->nreleases; out: UNLOCKREGION(dbenv, lt); - return (0); + return (ret); } static int __lock_put_internal(lt, lockp, obj_ndx, flags) DB_LOCKTAB *lt; struct __db_lock *lockp; - u_int32_t obj_ndx; - u_int32_t flags; + u_int32_t obj_ndx, flags; { DB_LOCKOBJ *sh_obj; DB_LOCKREGION *region; - int no_reclaim, ret, state_changed; + int ret, state_changed; region = lt->reginfo.primary; - no_reclaim = ret = state_changed = 0; + ret = state_changed = 0; if (!OBJ_LINKS_VALID(lockp)) { /* @@ -791,14 +1084,14 @@ __lock_put_internal(lt, lockp, obj_ndx, flags) lockp->status = DB_LSTAT_FREE; SH_TAILQ_INSERT_HEAD( ®ion->free_locks, lockp, links, __db_lock); - region->nlocks--; + region->stat.st_nlocks--; return (0); } if (LF_ISSET(DB_LOCK_DOALL)) - region->nreleases += lockp->refcount; + region->stat.st_nreleases += lockp->refcount; else - region->nreleases++; + region->stat.st_nreleases++; if (!LF_ISSET(DB_LOCK_DOALL) && lockp->refcount > 1) { lockp->refcount--; @@ -812,8 +1105,8 @@ __lock_put_internal(lt, lockp, obj_ndx, flags) sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); /* Remove this lock from its holders/waitlist. */ - if (lockp->status != DB_LSTAT_HELD) - __lock_remove_waiter(lt->dbenv, sh_obj, lockp, DB_LSTAT_FREE); + if (lockp->status != DB_LSTAT_HELD && lockp->status != DB_LSTAT_PENDING) + __lock_remove_waiter(lt, sh_obj, lockp, DB_LSTAT_FREE); else { SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock); lockp->links.stqe_prev = -1; @@ -822,15 +1115,15 @@ __lock_put_internal(lt, lockp, obj_ndx, flags) if (LF_ISSET(DB_LOCK_NOPROMOTE)) state_changed = 0; else - state_changed = - __lock_promote(lt, sh_obj, LF_ISSET(DB_LOCK_NOWAITERS)); + state_changed = __lock_promote(lt, + sh_obj, LF_ISSET(DB_LOCK_REMOVE | DB_LOCK_NOWAITERS)); if (LF_ISSET(DB_LOCK_UNLINK)) - ret = __lock_checklocker(lt, lockp, lockp->holder, flags, NULL); + ret = __lock_checklocker(lt, lockp, lockp->holder, flags); /* Check if object should be reclaimed. */ - if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL - && SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) { + if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL && + SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) { HASHREMOVE_EL(lt->obj_tab, obj_ndx, __db_lockobj, links, sh_obj); if (sh_obj->lockobj.size > sizeof(sh_obj->objdata)) @@ -838,7 +1131,7 @@ __lock_put_internal(lt, lockp, obj_ndx, flags) SH_DBT_PTR(&sh_obj->lockobj)); SH_TAILQ_INSERT_HEAD( ®ion->free_objs, sh_obj, links, __db_lockobj); - region->nobjects--; + region->stat.st_nobjects--; state_changed = 1; } @@ -847,7 +1140,7 @@ __lock_put_internal(lt, lockp, obj_ndx, flags) lockp->status = DB_LSTAT_FREE; SH_TAILQ_INSERT_HEAD( ®ion->free_locks, lockp, links, __db_lock); - region->nlocks--; + region->stat.st_nlocks--; } /* @@ -872,11 +1165,10 @@ __lock_put_internal(lt, lockp, obj_ndx, flags) * Must be called without the locker's lock set. */ static int -__lock_checklocker(lt, lockp, locker, flags, freed) +__lock_checklocker(lt, lockp, locker, flags) DB_LOCKTAB *lt; struct __db_lock *lockp; u_int32_t locker, flags; - int *freed; { DB_ENV *dbenv; DB_LOCKER *sh_locker; @@ -888,17 +1180,14 @@ __lock_checklocker(lt, lockp, locker, flags, freed) region = lt->reginfo.primary; ret = 0; - if (freed != NULL) - *freed = 0; - LOCKER_LOCK(lt, region, locker, indx); /* If the locker's list is NULL, free up the locker. */ if ((ret = __lock_getlocker(lt, locker, indx, 0, &sh_locker)) != 0 || sh_locker == NULL) { if (ret == 0) - ret = EACCES; - __db_err(lt->dbenv, __db_locker_invalid); + ret = EINVAL; + __db_err(dbenv, __db_locker_invalid); goto freelock; } @@ -908,22 +1197,25 @@ __lock_checklocker(lt, lockp, locker, flags, freed) goto freelock; } - if (LF_ISSET(DB_LOCK_UNLINK)) + if (LF_ISSET(DB_LOCK_UNLINK)) { SH_LIST_REMOVE(lockp, locker_links, __db_lock); + if (lockp->status == DB_LSTAT_HELD) { + sh_locker->nlocks--; + if (IS_WRITELOCK(lockp->mode)) + sh_locker->nwrites--; + } + } - if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL - && LOCKER_FREEABLE(sh_locker)) { + if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL && + LF_ISSET(DB_LOCK_FREE_LOCKER)) __lock_freelocker( lt, region, sh_locker, indx); - if (freed != NULL) - *freed = 1; - } freelock: if (LF_ISSET(DB_LOCK_FREE)) { lockp->status = DB_LSTAT_FREE; SH_TAILQ_INSERT_HEAD( ®ion->free_locks, lockp, links, __db_lock); - region->nlocks--; + region->stat.st_nlocks--; } return (ret); @@ -1019,11 +1311,9 @@ __lock_freefamilylocker(lt, locker) LOCKER_LOCK(lt, region, locker, indx); if ((ret = __lock_getlocker(lt, - locker, indx, 0, &sh_locker)) != 0 || sh_locker == NULL) { - if (ret == 0) - ret = EACCES; + locker, indx, 0, &sh_locker)) != 0 || sh_locker == NULL) goto freelock; - } + if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) != NULL) { ret = EINVAL; __db_err(dbenv, "Freeing locker with locks"); @@ -1046,11 +1336,8 @@ freelock: * common code for deleting a locker. * * This must be called with the locker bucket locked. - * - * PUBLIC: void __lock_freelocker __P((DB_LOCKTAB *, - * PUBLIC: DB_LOCKREGION *, DB_LOCKER *, u_int32_t)); */ -void +static void __lock_freelocker(lt, region, sh_locker, indx) DB_LOCKTAB *lt; DB_LOCKREGION *region; @@ -1062,7 +1349,123 @@ __lock_freelocker(lt, region, sh_locker, indx) lt->locker_tab, indx, __db_locker, links, sh_locker); SH_TAILQ_INSERT_HEAD( ®ion->free_lockers, sh_locker, links, __db_locker); - region->nlockers--; + SH_TAILQ_REMOVE(®ion->lockers, sh_locker, ulinks, __db_locker); + region->stat.st_nlockers--; +} + +/* + * __lock_set_timeout + * -- set timeout values in shared memory. + * This is called from the transaction system. + * We either set the time that this tranaction expires or the + * amount of time that a lock for this transaction is permitted + * to wait. + * + * PUBLIC: int __lock_set_timeout __P(( DB_ENV *, + * PUBLIC: u_int32_t, db_timeout_t, u_int32_t)); + */ +int +__lock_set_timeout(dbenv, locker, timeout, op) + DB_ENV *dbenv; + u_int32_t locker; + db_timeout_t timeout; + u_int32_t op; +{ + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + u_int32_t locker_ndx; + int ret; + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + LOCKREGION(dbenv, lt); + + LOCKER_LOCK(lt, region, locker, locker_ndx); + ret = __lock_getlocker(lt, locker, locker_ndx, 1, &sh_locker); + UNLOCKREGION(dbenv, lt); + if (ret != 0) + return (ret); + + if (op == DB_SET_TXN_TIMEOUT) { + if (timeout == 0) + LOCK_SET_TIME_INVALID(&sh_locker->tx_expire); + else + __lock_expires(dbenv, &sh_locker->tx_expire, timeout); + } else if (op == DB_SET_LOCK_TIMEOUT) { + sh_locker->lk_timeout = timeout; + F_SET(sh_locker, DB_LOCKER_TIMEOUT); + } else if (op == DB_SET_TXN_NOW) { + LOCK_SET_TIME_INVALID(&sh_locker->tx_expire); + __lock_expires(dbenv, &sh_locker->tx_expire, 0); + sh_locker->lk_expire = sh_locker->tx_expire; + } else + return (EINVAL); + + return (0); +} + +/* + * __lock_inherit_timeout + * -- inherit timeout values from parent locker. + * This is called from the transaction system. This will + * return EINVAL if the parent does not exist or did not + * have a current txn timeout set. + * + * PUBLIC: int __lock_inherit_timeout __P(( DB_ENV *, u_int32_t, u_int32_t)); + */ +int +__lock_inherit_timeout(dbenv, parent, locker) + DB_ENV *dbenv; + u_int32_t parent, locker; +{ + DB_LOCKER *parent_locker, *sh_locker; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + u_int32_t locker_ndx; + int ret; + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + ret = 0; + LOCKREGION(dbenv, lt); + + /* If the parent does not exist, we are done. */ + LOCKER_LOCK(lt, region, parent, locker_ndx); + if ((ret = __lock_getlocker(lt, + parent, locker_ndx, 0, &parent_locker)) != 0) + goto err; + + /* + * If the parent is not there yet, thats ok. If it + * does not have any timouts set, then avoid creating + * the child locker at this point. + */ + if (parent_locker == NULL || + (LOCK_TIME_ISVALID(&parent_locker->tx_expire) && + !F_ISSET(parent_locker, DB_LOCKER_TIMEOUT))) { + ret = EINVAL; + goto done; + } + + LOCKER_LOCK(lt, region, locker, locker_ndx); + if ((ret = __lock_getlocker(lt, + locker, locker_ndx, 1, &sh_locker)) != 0) + goto err; + + sh_locker->tx_expire = parent_locker->tx_expire; + + if (F_ISSET(parent_locker, DB_LOCKER_TIMEOUT)) { + sh_locker->lk_timeout = parent_locker->lk_timeout; + F_SET(sh_locker, DB_LOCKER_TIMEOUT); + if (!LOCK_TIME_ISVALID(&parent_locker->tx_expire)) + ret = EINVAL; + } + +done: +err: + UNLOCKREGION(dbenv, lt); + return (ret); } /* @@ -1101,13 +1504,13 @@ __lock_getlocker(lt, locker, indx, create, retp) /* Create new locker and then insert it into hash table. */ if ((sh_locker = SH_TAILQ_FIRST( ®ion->free_lockers, __db_locker)) == NULL) { - __db_err(lt->dbenv, __db_lock_err, "locker entries"); + __db_err(dbenv, __db_lock_err, "locker entries"); return (ENOMEM); } SH_TAILQ_REMOVE( ®ion->free_lockers, sh_locker, links, __db_locker); - if (++region->nlockers > region->maxnlockers) - region->maxnlockers = region->nlockers; + if (++region->stat.st_nlockers > region->stat.st_maxnlockers) + region->stat.st_maxnlockers = region->stat.st_nlockers; sh_locker->id = locker; sh_locker->dd_id = 0; @@ -1116,8 +1519,18 @@ __lock_getlocker(lt, locker, indx, create, retp) SH_LIST_INIT(&sh_locker->child_locker); sh_locker->flags = 0; SH_LIST_INIT(&sh_locker->heldby); + sh_locker->nlocks = 0; + sh_locker->nwrites = 0; + sh_locker->lk_timeout = 0; + LOCK_SET_TIME_INVALID(&sh_locker->tx_expire); + if (locker < TXN_MINIMUM && region->tx_timeout != 0) + __lock_expires(dbenv, + &sh_locker->tx_expire, region->tx_timeout); + LOCK_SET_TIME_INVALID(&sh_locker->lk_expire); HASHINSERT(lt->locker_tab, indx, __db_locker, links, sh_locker); + SH_TAILQ_INSERT_HEAD(®ion->lockers, + sh_locker, ulinks, __db_locker); } *retp = sh_locker; @@ -1131,11 +1544,8 @@ __lock_getlocker(lt, locker, indx, create, retp) * the table. * * This must be called with the object bucket locked. - * - * PUBLIC: int __lock_getobj __P((DB_LOCKTAB *, - * PUBLIC: const DBT *, u_int32_t, int, DB_LOCKOBJ **)); */ -int +static int __lock_getobj(lt, obj, ndx, create, retp) DB_LOCKTAB *lt; const DBT *obj; @@ -1185,8 +1595,8 @@ __lock_getobj(lt, obj, ndx, create, retp) SH_TAILQ_REMOVE( ®ion->free_objs, sh_obj, links, __db_lockobj); - if (++region->nobjects > region->maxnobjects) - region->maxnobjects = region->nobjects; + if (++region->stat.st_nobjects > region->stat.st_maxnobjects) + region->stat.st_maxnobjects = region->stat.st_nobjects; SH_TAILQ_INIT(&sh_obj->waiters); SH_TAILQ_INIT(&sh_obj->holders); @@ -1220,7 +1630,7 @@ __lock_is_parent(lt, locker, sh_locker) parent = sh_locker; while (parent->parent_locker != INVALID_ROFF) { parent = (DB_LOCKER *) - R_ADDR(<->reginfo, parent->parent_locker); + R_ADDR(<->reginfo, parent->parent_locker); if (parent->id == locker) return (1); } @@ -1234,13 +1644,13 @@ __lock_is_parent(lt, locker, sh_locker) * Look through the waiters and holders lists and decide which (if any) * locks can be promoted. Promote any that are eligible. * - * PUBLIC: int __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *, int)); + * PUBLIC: int __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *, u_int32_t)); */ int -__lock_promote(lt, obj, not_waiters) +__lock_promote(lt, obj, flags) DB_LOCKTAB *lt; DB_LOCKOBJ *obj; - int not_waiters; + u_int32_t flags; { struct __db_lock *lp_w, *lp_h, *next_waiter; DB_LOCKER *sh_locker; @@ -1270,16 +1680,25 @@ __lock_promote(lt, obj, not_waiters) lp_w = next_waiter) { had_waiters = 1; next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock); + + /* Waiter may have aborted or expired. */ + if (lp_w->status != DB_LSTAT_WAITING) + continue; /* Are we switching locks? */ - if (not_waiters && lp_w->mode == DB_LOCK_WAIT) + if (LF_ISSET(DB_LOCK_NOWAITERS) && lp_w->mode == DB_LOCK_WAIT) continue; + + if (LF_ISSET(DB_LOCK_REMOVE)) { + __lock_remove_waiter(lt, obj, lp_w, DB_LSTAT_NOTEXIST); + continue; + } for (lp_h = SH_TAILQ_FIRST(&obj->holders, __db_lock); lp_h != NULL; lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) { if (lp_h->holder != lp_w->holder && CONFLICTS(lt, region, lp_h->mode, lp_w->mode)) { - - LOCKER_LOCK(lt, region, lp_w->holder, locker_ndx); + LOCKER_LOCK(lt, + region, lp_w->holder, locker_ndx); if ((__lock_getlocker(lt, lp_w->holder, locker_ndx, 0, &sh_locker)) != 0) { DB_ASSERT(0); @@ -1323,19 +1742,26 @@ __lock_promote(lt, obj, not_waiters) * This must be called with the Object bucket locked. */ static void -__lock_remove_waiter(dbenv, sh_obj, lockp, status) - DB_ENV *dbenv; +__lock_remove_waiter(lt, sh_obj, lockp, status) + DB_LOCKTAB *lt; DB_LOCKOBJ *sh_obj; struct __db_lock *lockp; db_status_t status; { + DB_LOCKREGION *region; int do_wakeup; + region = lt->reginfo.primary; + do_wakeup = lockp->status == DB_LSTAT_WAITING; SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock); lockp->links.stqe_prev = -1; lockp->status = status; + if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) + SH_TAILQ_REMOVE( + ®ion->dd_objs, + sh_obj, dd_links, __db_lockobj); /* * Wake whoever is waiting on this lock. @@ -1344,96 +1770,105 @@ __lock_remove_waiter(dbenv, sh_obj, lockp, status) * keep the compiler quiet. */ if (do_wakeup) - MUTEX_UNLOCK(dbenv, &lockp->mutex); + MUTEX_UNLOCK(lt->dbenv, &lockp->mutex); } /* - * __lock_printlock -- - * - * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int)); + * __lock_expires -- set the expire time given the time to live. + * We assume that if timevalp is set then it contains "now". + * This avoids repeated system calls to get the time. */ -void -__lock_printlock(lt, lp, ispgno) - DB_LOCKTAB *lt; - struct __db_lock *lp; - int ispgno; +static void +__lock_expires(dbenv, timevalp, timeout) + DB_ENV *dbenv; + db_timeval_t *timevalp; + db_timeout_t timeout; { - DB_LOCKOBJ *lockobj; - db_pgno_t pgno; - u_int32_t *fidp; - u_int8_t *ptr, type; - const char *mode, *status; - - switch (lp->mode) { - case DB_LOCK_IREAD: - mode = "IREAD"; - break; - case DB_LOCK_IWR: - mode = "IWR"; - break; - case DB_LOCK_IWRITE: - mode = "IWRITE"; - break; - case DB_LOCK_NG: - mode = "NG"; - break; - case DB_LOCK_READ: - mode = "READ"; - break; - case DB_LOCK_WRITE: - mode = "WRITE"; - break; - case DB_LOCK_WAIT: - mode = "WAIT"; - break; - default: - mode = "UNKNOWN"; - break; - } - switch (lp->status) { - case DB_LSTAT_ABORTED: - status = "ABORT"; - break; - case DB_LSTAT_ERR: - status = "ERROR"; - break; - case DB_LSTAT_FREE: - status = "FREE"; - break; - case DB_LSTAT_HELD: - status = "HELD"; - break; - case DB_LSTAT_NOGRANT: - status = "NONE"; - break; - case DB_LSTAT_WAITING: - status = "WAIT"; - break; - case DB_LSTAT_PENDING: - status = "PENDING"; - break; - default: - status = "UNKNOWN"; - break; + if (!LOCK_TIME_ISVALID(timevalp)) + __os_clock(dbenv, &timevalp->tv_sec, &timevalp->tv_usec); + if (timeout > 1000000) { + timevalp->tv_sec += timeout / 1000000; + timevalp->tv_usec += timeout % 1000000; + } else + timevalp->tv_usec += timeout; + + if (timevalp->tv_usec > 1000000) { + timevalp->tv_sec++; + timevalp->tv_usec -= 1000000; } - printf("\t%lx\t%s\t%lu\t%s\t", - (u_long)lp->holder, mode, (u_long)lp->refcount, status); - - lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); - ptr = SH_DBT_PTR(&lockobj->lockobj); - if (ispgno && lockobj->lockobj.size == sizeof(struct __db_ilock)) { - /* Assume this is a DBT lock. */ - memcpy(&pgno, ptr, sizeof(db_pgno_t)); - fidp = (u_int32_t *)(ptr + sizeof(db_pgno_t)); - type = *(u_int8_t *)(ptr + sizeof(db_pgno_t) + DB_FILE_ID_LEN); - printf("%s %lu (%lu %lu %lu %lu %lu)\n", - type == DB_PAGE_LOCK ? "page" : "record", - (u_long)pgno, - (u_long)fidp[0], (u_long)fidp[1], (u_long)fidp[2], - (u_long)fidp[3], (u_long)fidp[4]); - } else { - printf("0x%lx ", (u_long)R_OFFSET(<->reginfo, lockobj)); - __db_pr(ptr, lockobj->lockobj.size); - printf("\n"); +} + +/* + * __lock_expired -- determine if a lock has expired. + * + * PUBLIC: int __lock_expired __P((DB_ENV *, db_timeval_t *, db_timeval_t *)); + */ +int +__lock_expired(dbenv, now, timevalp) + DB_ENV *dbenv; + db_timeval_t *now, *timevalp; +{ + if (!LOCK_TIME_ISVALID(timevalp)) + return (0); + + if (!LOCK_TIME_ISVALID(now)) + __os_clock(dbenv, &now->tv_sec, &now->tv_usec); + + return (now->tv_sec > timevalp->tv_sec || + (now->tv_sec == timevalp->tv_sec && + now->tv_usec >= timevalp->tv_usec)); +} + +/* + * __lock_trade -- + * + * Trade locker ids on a lock. This is used to reassign file locks from + * a transactional locker id to a long-lived locker id. This should be + * called with the region mutex held. + */ +static int +__lock_trade(dbenv, lock, new_locker) + DB_ENV *dbenv; + DB_LOCK *lock; + u_int32_t new_locker; +{ + struct __db_lock *lp; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_LOCKER *sh_locker; + int ret; + u_int32_t locker_ndx; + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + lp = (struct __db_lock *)R_ADDR(<->reginfo, lock->off); + + /* If the lock is already released, simply return. */ + if (lp->gen != lock->gen) + return (DB_NOTFOUND); + + /* Make sure that we can get new locker and add this lock to it. */ + LOCKER_LOCK(lt, region, new_locker, locker_ndx); + if ((ret = + __lock_getlocker(lt, new_locker, locker_ndx, 0, &sh_locker)) != 0) + return (ret); + + if (sh_locker == NULL) { + __db_err(dbenv, "Locker does not exist"); + return (EINVAL); } + + /* Remove the lock from its current locker. */ + if ((ret = __lock_checklocker(lt, lp, lp->holder, DB_LOCK_UNLINK)) != 0) + return (ret); + + /* Add lock to its new locker. */ + SH_LIST_INSERT_HEAD(&sh_locker->heldby, lp, locker_links, __db_lock); + sh_locker->nlocks++; + if (IS_WRITELOCK(lp->mode)) + sh_locker->nwrites++; + lp->holder = new_locker; + + return (0); } |