diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2015-04-27 17:17:31 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-04-27 17:41:21 +1000 |
commit | 7ca9da48efb20d8e852f8393f2714d8051b139d3 (patch) | |
tree | 54624a9cebd2487cc184b7f91d71301d1380b272 | |
parent | 26fd30aa13358ebec87ffd3d449fba11a2d282c1 (diff) | |
download | mongo-7ca9da48efb20d8e852f8393f2714d8051b139d3.tar.gz |
Merge pull request #1894 from wiredtiger/tree-discard-background
Discard trees from cache in the background
Conflicts:
src/conn/conn_dhandle.c
-rw-r--r-- | src/conn/conn_dhandle.c | 112 | ||||
-rw-r--r-- | src/conn/conn_sweep.c | 228 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 13 | ||||
-rw-r--r-- | src/include/dhandle.h | 13 | ||||
-rw-r--r-- | src/include/extern.h | 6 | ||||
-rw-r--r-- | src/lsm/lsm_work_unit.c | 2 | ||||
-rw-r--r-- | src/meta/meta_table.c | 2 | ||||
-rw-r--r-- | src/session/session_dhandle.c | 75 | ||||
-rw-r--r-- | src/txn/txn_ckpt.c | 6 |
9 files changed, 294 insertions, 163 deletions
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 0eb3389de34..65b9af1aa01 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -70,28 +70,23 @@ __conn_dhandle_open_lock( return (EBUSY); /* - * If the handle is open, try to get a read lock and recheck. + * If the handle is open, get a read lock and recheck. * - * Try to get a read lock if we want exclusive access and failed + * Wait for a read lock if we want exclusive access and failed * to get it: the sweep server may be closing this handle, and - * we need to wait for it to complete. If we want exclusive - * access and find the handle open once we get the read lock, - * give up: some other thread has it locked for real. + * we need to wait for it to release its lock. If we want + * exclusive access and find the handle open once we get the + * read lock, give up: some other thread has it locked for real. */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && (!want_exclusive || lock_busy)) { - ret = __wt_try_readlock(session, dhandle->rwlock); - if (ret == EBUSY) { - if (want_exclusive && - F_ISSET(dhandle, WT_DHANDLE_OPEN)) - return (ret); - __wt_yield(); - continue; - } - WT_RET(ret); + WT_RET(__wt_readlock(session, dhandle->rwlock)); is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN) ? 1 : 0; - if (is_open && !want_exclusive) + if (is_open && !want_exclusive) { + WT_ASSERT(session, + !F_ISSET(dhandle, WT_DHANDLE_DEAD)); return (0); + } WT_RET(__wt_readunlock(session, dhandle->rwlock)); } else is_open = 0; @@ -117,6 +112,7 @@ __conn_dhandle_open_lock( /* We have an exclusive lock, we're done. */ F_SET(dhandle, WT_DHANDLE_EXCLUSIVE); + WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD)); return (0); } else if (ret != EBUSY || (is_open && want_exclusive)) return (ret); @@ -149,13 +145,26 @@ __wt_conn_dhandle_find(WT_SESSION_IMPL *session, /* Increment the reference count if we already have the btree open. */ bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; - SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) - if (strcmp(name, dhandle->name) == 0 && - ((ckpt == NULL && dhandle->checkpoint == NULL) || - (ckpt != NULL && dhandle->checkpoint != NULL && - strcmp(ckpt, dhandle->checkpoint) == 0))) { - session->dhandle = dhandle; - return (0); + if (ckpt == NULL) { + SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) { + if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) + continue; + if (dhandle->checkpoint == NULL && + strcmp(name, dhandle->name) == 0) { + session->dhandle = dhandle; + return (0); + } + } + } else + SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) { + if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) + continue; + if (dhandle->checkpoint != NULL && + strcmp(name, dhandle->name) == 0 && + strcmp(ckpt, dhandle->checkpoint) == 0) { + session->dhandle = dhandle; + return (0); + } } return (WT_NOTFOUND); @@ -238,6 +247,30 @@ err: WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock)); } /* + * __conn_dhandle_mark_dead -- + * Mark a data handle dead. + */ +static int +__conn_dhandle_mark_dead(WT_SESSION_IMPL *session) +{ + int evict_reset; + + WT_ASSERT(session, F_ISSET(session, WT_SESSION_HANDLE_LIST_LOCKED)); + + /* + * Handle forced discard (e.g., when dropping a file). + * + * We need exclusive access to the file -- disable ordinary + * eviction and drain any blocks already queued. + */ + WT_RET(__wt_evict_file_exclusive_on(session, &evict_reset)); + F_SET(session->dhandle, WT_DHANDLE_DEAD); + if (evict_reset) + __wt_evict_file_exclusive_off(session); + return (0); +} + +/* * __wt_conn_btree_sync_and_close -- * Sync and close the underlying btree handle. */ @@ -278,16 +311,25 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force) /* * The close can fail if an update cannot be written, return the EBUSY * error to our caller for eventual retry. + * + * If we are forcing the close, just mark the handle dead and the tree + * will be discarded later. Don't do this for memory-mapped trees: we + * have to close the file handle to allow the file to be removed, but + * memory mapped trees contain pointers into memory that will become + * invalid if the mapping is closed. */ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) - WT_ERR(__wt_checkpoint_close(session, final, force)); - - if (dhandle->checkpoint == NULL) - --S2C(session)->open_btree_count; + WT_ERR(force && (btree->bm == NULL || btree->bm->map == NULL) ? + __conn_dhandle_mark_dead(session) : + __wt_checkpoint_close(session, final)); WT_TRET(__wt_btree_close(session)); - F_CLR(dhandle, WT_DHANDLE_OPEN); + if (!force || final) { + F_CLR(dhandle, WT_DHANDLE_OPEN); + if (dhandle->checkpoint == NULL) + --S2C(session)->open_btree_count; + } F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); err: __wt_spin_unlock(session, &dhandle->close_lock); @@ -529,6 +571,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE; SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && + !F_ISSET(dhandle, WT_DHANDLE_DEAD) && strcmp(uri, dhandle->name) == 0 && (apply_checkpoints || dhandle->checkpoint == NULL)) WT_RET(__conn_btree_apply_internal( @@ -536,6 +579,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, } else { SLIST_FOREACH(dhandle, &conn->dhlh, l) if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && + !F_ISSET(dhandle, WT_DHANDLE_DEAD) && (apply_checkpoints || dhandle->checkpoint == NULL) && WT_PREFIX_MATCH(dhandle->name, "file:") && @@ -658,7 +702,8 @@ __wt_conn_dhandle_close_all( bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) { - if (strcmp(dhandle->name, name) != 0) + if (strcmp(dhandle->name, name) != 0 || + F_ISSET(dhandle, WT_DHANDLE_DEAD)) continue; session->dhandle = dhandle; @@ -730,7 +775,7 @@ __conn_dhandle_remove(WT_SESSION_IMPL *session, int final) * Close/discard a single data handle. */ int -__wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final) +__wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final, int force) { WT_DATA_HANDLE *dhandle; WT_DECL_RET; @@ -738,8 +783,9 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final) dhandle = session->dhandle; - if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { - tret = __wt_conn_btree_sync_and_close(session, final, 0); + if (F_ISSET(dhandle, WT_DHANDLE_OPEN) || + (final && F_ISSET(dhandle, WT_DHANDLE_DEAD))) { + tret = __wt_conn_btree_sync_and_close(session, final, force); if (final && tret != 0) { __wt_err(session, tret, "Final close of %s failed", dhandle->name); @@ -803,7 +849,7 @@ restart: continue; WT_WITH_DHANDLE(session, dhandle, - WT_TRET(__wt_conn_dhandle_discard_single(session, 1))); + WT_TRET(__wt_conn_dhandle_discard_single(session, 1, 0))); goto restart; } @@ -819,7 +865,7 @@ restart: /* Close the metadata file handle. */ while ((dhandle = SLIST_FIRST(&conn->dhlh)) != NULL) WT_WITH_DHANDLE(session, dhandle, - WT_TRET(__wt_conn_dhandle_discard_single(session, 1))); + WT_TRET(__wt_conn_dhandle_discard_single(session, 1, 0))); return (ret); } diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 46f40a6fefd..fc29e0b2e15 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -9,104 +9,87 @@ #include "wt_internal.h" /* - * __sweep_remove_handles -- - * Remove closed dhandles from the connection list. + * __sweep_mark -- + * Mark idle handles with a time of death, and note if we see dead + * handles. */ static int -__sweep_remove_handles(WT_SESSION_IMPL *session) +__sweep_mark(WT_SESSION_IMPL *session, int *dead_handlesp) { WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle, *dhandle_next; - WT_DECL_RET; + WT_DATA_HANDLE *dhandle; + time_t now; conn = S2C(session); - dhandle = SLIST_FIRST(&conn->dhlh); + *dead_handlesp = 0; - for (; dhandle != NULL; dhandle = dhandle_next) { - dhandle_next = SLIST_NEXT(dhandle, l); + /* Don't discard handles that have been open recently. */ + WT_RET(__wt_seconds(session, &now)); + + WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); + SLIST_FOREACH(dhandle, &conn->dhlh, l) { if (WT_IS_METADATA(dhandle)) continue; - if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) + if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) { + ++*dead_handlesp; continue; - - /* Make sure we get exclusive access. */ - if ((ret = - __wt_try_writelock(session, dhandle->rwlock)) == EBUSY) + } + if (dhandle->session_inuse != 0 || + now <= dhandle->timeofdeath + conn->sweep_idle_time) continue; - WT_RET(ret); - - /* - * If there are no longer any references to the handle in any - * sessions, attempt to discard it. - */ - if (F_ISSET(dhandle, WT_DHANDLE_OPEN) || - dhandle->session_inuse != 0 || dhandle->session_ref != 0) { - WT_RET(__wt_writeunlock(session, dhandle->rwlock)); + if (dhandle->timeofdeath == 0) { + dhandle->timeofdeath = now; + WT_STAT_FAST_CONN_INCR(session, dh_conn_tod); continue; } - WT_WITH_DHANDLE(session, dhandle, - ret = __wt_conn_dhandle_discard_single(session, 0)); - - /* If the handle was not successfully discarded, unlock it. */ - if (ret != 0) - WT_TRET(__wt_writeunlock(session, dhandle->rwlock)); - WT_RET_BUSY_OK(ret); - WT_STAT_FAST_CONN_INCR(session, dh_conn_ref); + /* We now have a candidate to close. */ + ++*dead_handlesp; } - return (ret == EBUSY ? 0 : ret); + return (0); } /* - * __sweep -- - * Close unused dhandles on the connection dhandle list. + * __sweep_expire -- + * Mark trees dead if they are clean and haven't been accessed recently, + * until we have reached the configured minimum number of handles. */ static int -__sweep(WT_SESSION_IMPL *session) +__sweep_expire(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; time_t now; - int closed_handles; conn = S2C(session); - closed_handles = 0; /* Don't discard handles that have been open recently. */ WT_RET(__wt_seconds(session, &now)); WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); SLIST_FOREACH(dhandle, &conn->dhlh, l) { + /* + * Ignore open files once the open file count reaches the + * minimum number of handles. + */ + if (conn->open_file_count < conn->sweep_handles_min) + break; + if (WT_IS_METADATA(dhandle)) continue; - - if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) && - dhandle->session_inuse == 0 && dhandle->session_ref == 0) { - ++closed_handles; + if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || + F_ISSET(dhandle, WT_DHANDLE_DEAD)) continue; - } if (dhandle->session_inuse != 0 || now <= dhandle->timeofdeath + conn->sweep_idle_time) continue; - if (dhandle->timeofdeath == 0) { - dhandle->timeofdeath = now; - WT_STAT_FAST_CONN_INCR(session, dh_conn_tod); - continue; - } - - /* - * Ignore in-use files once the open file count reaches the - * minimum number of handles. - */ - if (conn->open_file_count < conn->sweep_handles_min) - continue; /* * We have a candidate for closing; if it's open, acquire an - * exclusive lock on the handle and close it. + * exclusive lock on the handle and mark it dead. * * The close would require I/O if an update cannot be written * (updates in a no-longer-referenced file might not yet be @@ -115,8 +98,10 @@ __sweep(WT_SESSION_IMPL *session) * next time, after the transaction state has progressed. * * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want - * opens to block on us rather than returning an EBUSY error to - * the application. + * opens to block on us and then retry rather than returning an + * EBUSY error to the application. This is done holding the + * handle list lock so that connection-level handle searches + * never need to retry. */ if ((ret = __wt_try_writelock(session, dhandle->rwlock)) == EBUSY) @@ -129,31 +114,103 @@ __sweep(WT_SESSION_IMPL *session) !__wt_txn_visible_all(session, btree->rec_max_txn)) goto unlock; - /* If the handle is open, try to close it. */ - if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { - WT_WITH_DHANDLE(session, dhandle, ret = - __wt_conn_btree_sync_and_close(session, 0, 0)); + /* + * Mark the handle as dead and close the underlying file + * handle. Closing the handle decrements the open file count, + * meaning the close loop won't overrun the configured minimum. + */ + WT_WITH_DHANDLE(session, dhandle, ret = + __wt_conn_btree_sync_and_close(session, 0, 1)); - /* We closed the btree handle, bump the statistic. */ - if (ret == 0) - WT_STAT_FAST_CONN_INCR( - session, dh_conn_handles); - } +unlock: WT_TRET(__wt_writeunlock(session, dhandle->rwlock)); + WT_RET_BUSY_OK(ret); + } - if (dhandle->session_inuse == 0 && dhandle->session_ref == 0) - ++closed_handles; + return (0); +} +/* + * __sweep_flush -- + * Flush pages from dead trees. + */ +static int +__sweep_flush(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + + conn = S2C(session); + + WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); + SLIST_FOREACH(dhandle, &conn->dhlh, l) { + if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || + !F_ISSET(dhandle, WT_DHANDLE_DEAD)) + continue; + + /* If the handle is marked "dead", flush it from cache. */ + WT_WITH_DHANDLE(session, dhandle, ret = + __wt_conn_btree_sync_and_close(session, 0, 0)); + + /* We closed the btree handle, bump the statistic. */ + if (ret == 0) + WT_STAT_FAST_CONN_INCR(session, dh_conn_handles); -unlock: WT_TRET(__wt_writeunlock(session, dhandle->rwlock)); WT_RET_BUSY_OK(ret); } - if (closed_handles) { - WT_WITH_DHANDLE_LOCK(session, - ret = __sweep_remove_handles(session)); + return (0); +} + +/* + * __sweep_remove_handles -- + * Remove closed dhandles from the connection list. + */ +static int +__sweep_remove_handles(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle, *dhandle_next; + WT_DECL_RET; + + conn = S2C(session); + dhandle = SLIST_FIRST(&conn->dhlh); + + for (; dhandle != NULL; dhandle = dhandle_next) { + dhandle_next = SLIST_NEXT(dhandle, l); + if (WT_IS_METADATA(dhandle)) + continue; + if (F_ISSET(dhandle, WT_DHANDLE_OPEN) || + dhandle->session_inuse != 0 || + dhandle->session_ref != 0) + continue; + + /* Make sure we get exclusive access. */ + if ((ret = + __wt_try_writelock(session, dhandle->rwlock)) == EBUSY) + continue; WT_RET(ret); + + /* + * If there are no longer any references to the handle in any + * sessions, attempt to discard it. + */ + if (F_ISSET(dhandle, WT_DHANDLE_OPEN) || + dhandle->session_inuse != 0 || dhandle->session_ref != 0) { + WT_RET(__wt_writeunlock(session, dhandle->rwlock)); + continue; + } + + WT_WITH_DHANDLE(session, dhandle, + ret = __wt_conn_dhandle_discard_single(session, 0, 1)); + + /* If the handle was not successfully discarded, unlock it. */ + if (ret != 0) + WT_TRET(__wt_writeunlock(session, dhandle->rwlock)); + WT_RET_BUSY_OK(ret); + WT_STAT_FAST_CONN_INCR(session, dh_conn_ref); } - return (0); + return (ret == EBUSY ? 0 : ret); } /* @@ -166,12 +223,13 @@ __sweep_server(void *arg) WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; + int dead_handles; session = arg; conn = S2C(session); /* - * Sweep for dead handles. + * Sweep for dead and excess handles. */ while (F_ISSET(conn, WT_CONN_SERVER_RUN) && F_ISSET(conn, WT_CONN_SERVER_SWEEP)) { @@ -179,8 +237,28 @@ __sweep_server(void *arg) WT_ERR(__wt_cond_wait(session, conn->sweep_cond, (uint64_t)conn->sweep_interval * WT_MILLION)); - /* Sweep the handles. */ - WT_ERR(__sweep(session)); + /* + * Mark handles with a time of death, and report whether any + * handles are marked dead. + */ + WT_ERR(__sweep_mark(session, &dead_handles)); + + if (dead_handles == 0 && + conn->open_file_count < conn->sweep_handles_min) + continue; + + /* Close handles if we have reached the configured limit */ + if (conn->open_file_count >= conn->sweep_handles_min) { + WT_WITH_DHANDLE_LOCK(session, + ret = __sweep_expire(session)); + WT_ERR(ret); + } + + WT_ERR(__sweep_flush(session)); + + WT_WITH_DHANDLE_LOCK(session, + ret = __sweep_remove_handles(session)); + WT_ERR(ret); } if (0) { diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 5057946adc9..2b721359e03 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1426,6 +1426,19 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server) if (page->read_gen != WT_READGEN_OLDEST) page->read_gen = __wt_cache_read_gen_set(session); + /* + * If we are evicting in a dead tree, don't write dirty pages. + * + * Force pages clean to keep statistics correct and to let the + * page-discard function assert that no dirty pages are ever + * discarded. + */ + if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD) && + __wt_page_is_modified(page)) { + page->modify->write_gen = 0; + __wt_cache_dirty_decr(session, page); + } + WT_WITH_BTREE(session, btree, ret = __wt_evict_page(session, ref)); (void)WT_ATOMIC_SUB4(btree->evict_busy, 1); diff --git a/src/include/dhandle.h b/src/include/dhandle.h index 300e8e735b9..034db30a0a2 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -65,11 +65,12 @@ struct __wt_data_handle { WT_DSRC_STATS stats; /* Data-source statistics */ /* Flags values over 0xff are reserved for WT_BTREE_* */ -#define WT_DHANDLE_DISCARD 0x01 /* Discard on release */ -#define WT_DHANDLE_DISCARD_CLOSE 0x02 /* Close on release */ -#define WT_DHANDLE_EXCLUSIVE 0x04 /* Need exclusive access */ -#define WT_DHANDLE_HAVE_REF 0x08 /* Already have ref */ -#define WT_DHANDLE_LOCK_ONLY 0x10 /* Handle only used as a lock */ -#define WT_DHANDLE_OPEN 0x20 /* Handle is open */ +#define WT_DHANDLE_DEAD 0x01 /* Dead, awaiting discard */ +#define WT_DHANDLE_DISCARD 0x02 /* Discard on release */ +#define WT_DHANDLE_DISCARD_FORCE 0x04 /* Force discard on release */ +#define WT_DHANDLE_EXCLUSIVE 0x08 /* Need exclusive access */ +#define WT_DHANDLE_HAVE_REF 0x10 /* Already have ref */ +#define WT_DHANDLE_LOCK_ONLY 0x20 /* Handle only used as a lock */ +#define WT_DHANDLE_OPEN 0x40 /* Handle is open */ uint32_t flags; }; diff --git a/src/include/extern.h b/src/include/extern.h index 082ffb07a45..3c5e6f9412d 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -232,7 +232,7 @@ extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, int apply_checkpoints extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]); extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]); extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *name, int force); -extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final); +extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final, int force); extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn); extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn); @@ -576,7 +576,7 @@ extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, WT_SESSION_IMPL **sessionp); extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, int *skip); extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config); -extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags); +extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, int *deadp); extern int __wt_session_release_btree(WT_SESSION_IMPL *session); extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags); extern void __wt_session_close_cache(WT_SESSION_IMPL *session); @@ -674,7 +674,7 @@ extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force); +extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final); extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session); extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session); extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 15c3d34d5b0..15ba07664cc 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -457,7 +457,7 @@ __lsm_discard_handle( WT_RET(__wt_session_get_btree(session, uri, checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); - F_SET(session->dhandle, WT_DHANDLE_DISCARD); + F_SET(session->dhandle, WT_DHANDLE_DISCARD_FORCE); return (__wt_session_release_btree(session)); } diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index fb568361f74..eae0079effd 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -71,7 +71,7 @@ __wt_metadata_cursor( * We use the metadata a lot, so we have a handle cached; lock it and * increment the in-use counter once the cursor is open. */ - WT_ERR(__wt_session_lock_dhandle(session, 0)); + WT_ERR(__wt_session_lock_dhandle(session, 0, NULL)); WT_ERR(__wt_curfile_create(session, NULL, cfg, 0, 0, cursorp)); __wt_cursor_dhandle_incr_use(session); diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index a6c9fb867e7..a9baaf1f40d 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -47,7 +47,7 @@ __session_add_dhandle( * the schema lock. */ int -__wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags) +__wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, int *deadp) { enum { NOLOCK, READLOCK, WRITELOCK } locked; WT_BTREE *btree; @@ -57,6 +57,8 @@ __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags) btree = S2BT(session); dhandle = session->dhandle; locked = NOLOCK; + if (deadp != NULL) + *deadp = 0; /* * Special operation flags will cause the handle to be reopened. @@ -95,7 +97,10 @@ __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags) * required, we're done. Otherwise, check that the handle is open and * that no special flags are required. */ - if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) || + if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) { + WT_ASSERT(session, deadp != NULL); + *deadp = 1; + } else if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) || (F_ISSET(dhandle, WT_DHANDLE_OPEN) && special_flags == 0)) return (0); @@ -135,46 +140,25 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) dhandle = session->dhandle; locked = F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) ? WRITELOCK : READLOCK; - if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_CLOSE)) { - /* - * If configured to discard on last close, trade any read lock - * for an exclusive lock. If the exchange succeeds, setup for - * discard. It is expected acquiring an exclusive lock will fail - * sometimes since the handle may still be in use: in that case - * we're done. - */ - if (locked == READLOCK) { - locked = NOLOCK; - WT_ERR(__wt_readunlock(session, dhandle->rwlock)); - ret = __wt_try_writelock(session, dhandle->rwlock); - if (ret != 0) { - if (ret == EBUSY) - ret = 0; - goto err; - } - locked = WRITELOCK; - F_CLR(dhandle, WT_DHANDLE_DISCARD_CLOSE); - F_SET(dhandle, - WT_DHANDLE_DISCARD | WT_DHANDLE_EXCLUSIVE); - } - } - /* * If we had special flags set, close the handle so that future access * can get a handle without special flags. */ - if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) || + if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_FORCE)) { + WT_WITH_DHANDLE_LOCK(session, + ret = __wt_conn_btree_sync_and_close(session, 0, 1)); + F_CLR(dhandle, WT_DHANDLE_DISCARD_FORCE); + } else if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) || F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) { WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)); + ret = __wt_conn_btree_sync_and_close(session, 0, 0); F_CLR(dhandle, WT_DHANDLE_DISCARD); - - WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0)); } if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)) F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE); -err: switch (locked) { + switch (locked) { case NOLOCK: break; case READLOCK: @@ -312,7 +296,8 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session) dhandle = dhandle_cache->dhandle; if (dhandle != session->dhandle && dhandle->session_inuse == 0 && - now - dhandle->timeofdeath > conn->sweep_idle_time) { + (F_ISSET(dhandle, WT_DHANDLE_DEAD) || + now - dhandle->timeofdeath > conn->sweep_idle_time)) { WT_STAT_FAST_CONN_INCR(session, dh_session_handles); __session_discard_btree(session, dhandle_cache); } @@ -348,6 +333,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, WT_DATA_HANDLE_CACHE *dhandle_cache; WT_DECL_RET; uint64_t bucket; + int is_dead; WT_ASSERT(session, !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES)); WT_ASSERT(session, !LF_ISSET(WT_DHANDLE_HAVE_REF)); @@ -381,7 +367,8 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, if (dhandle != NULL) { /* Try to lock the handle; if this succeeds, we're done. */ - if ((ret = __wt_session_lock_dhandle(session, flags)) == 0) + if ((ret = + __wt_session_lock_dhandle(session, flags, &is_dead)) == 0) goto done; /* Propagate errors we don't expect. */ @@ -389,17 +376,23 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, return (ret); /* - * Don't try harder to get the btree handle if our caller - * hasn't allowed us to take the schema lock - they do so on - * purpose and will handle error returns. + * Don't try harder to get the handle if we're only checking + * for locks or our caller hasn't allowed us to take the schema + * lock - they do so on purpose and will handle error returns. */ - if (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) && + if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) || + (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) && F_ISSET(session, - WT_SESSION_HANDLE_LIST_LOCKED | WT_SESSION_TABLE_LOCKED)) + WT_SESSION_HANDLE_LIST_LOCKED | WT_SESSION_TABLE_LOCKED))) return (ret); - /* We found the data handle, don't try to get it again. */ - LF_SET(WT_DHANDLE_HAVE_REF); + /* If we found the handle and it isn't dead, reopen it. */ + if (is_dead) { + __session_discard_btree(session, dhandle_cache); + dhandle_cache = NULL; + session->dhandle = dhandle = NULL; + } else + LF_SET(WT_DHANDLE_HAVE_REF); } /* @@ -419,11 +412,11 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, WT_RET(__session_add_dhandle(session, NULL)); WT_ASSERT(session, LF_ISSET(WT_DHANDLE_LOCK_ONLY) || - F_ISSET(session->dhandle, WT_DHANDLE_OPEN)); + (F_ISSET(session->dhandle, WT_DHANDLE_OPEN) && + !F_ISSET(session->dhandle, WT_DHANDLE_DEAD))); done: WT_ASSERT(session, LF_ISSET(WT_DHANDLE_EXCLUSIVE) == F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE)); - F_SET(session->dhandle, LF_ISSET(WT_DHANDLE_DISCARD_CLOSE)); return (0); } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 7c1532390f9..e362541f8e6 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1090,7 +1090,7 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) * Checkpoint a single file as part of closing the handle. */ int -__wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force) +__wt_checkpoint_close(WT_SESSION_IMPL *session, int final) { WT_BTREE *btree; WT_DECL_RET; @@ -1099,8 +1099,8 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force) btree = S2BT(session); bulk = F_ISSET(btree, WT_BTREE_BULK) ? 1 : 0; - /* Handle forced discard (when dropping a file). */ - if (force) + /* If the handle is already dead, force the discard. */ + if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD_FORCE)); /* |