diff options
author | Keith Bostic <keith@wiredtiger.com> | 2014-07-22 12:00:26 -0400 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2014-07-22 12:00:26 -0400 |
commit | 51de923778091761c99a48f5e786c7cef39f73da (patch) | |
tree | 2a01e75debedf303aa4dca7b0f77aa5ed5acfebe | |
parent | 6ada92843ec70dea42f1cc461fda0836c4ab62f9 (diff) | |
download | mongo-51de923778091761c99a48f5e786c7cef39f73da.tar.gz |
Switch from using the checkpoint lock to serialize bulk-load close and
database checkpoints, to using a new WT_BTREE handle lock.
Reference #1114.
-rw-r--r-- | src/btree/bt_handle.c | 5 | ||||
-rw-r--r-- | src/conn/conn_dhandle.c | 32 | ||||
-rw-r--r-- | src/include/btree.h | 7 | ||||
-rw-r--r-- | src/include/extern.h | 3 | ||||
-rw-r--r-- | src/txn/txn_ckpt.c | 70 |
5 files changed, 79 insertions, 38 deletions
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 5c1d7c1a2d5..a4587aeb40b 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -158,6 +158,7 @@ __wt_btree_close(WT_SESSION_IMPL *session) /* Destroy locks. */ WT_TRET(__wt_rwlock_destroy(session, &btree->ovfl_lock)); + __wt_spin_destroy(session, &btree->bulk_ckpt_lock); /* Free allocated memory. */ __wt_free(session, btree->key_format); @@ -305,9 +306,11 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) (int)cval.len, cval.str); } - /* Overflow lock. */ + /* Allocate locks. */ WT_RET(__wt_rwlock_alloc( session, "btree overflow lock", &btree->ovfl_lock)); + WT_RET(__wt_spin_init( + session, &btree->bulk_ckpt_lock, "bulk/checkpoint lock")); __wt_stat_init_dsrc_stats(&btree->dhandle->stats); diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index d95ab86b887..6d6ef8879c2 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -170,7 +170,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session) WT_BTREE *btree; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - int ckpt_lock; + int is_bulk; dhandle = session->dhandle; btree = S2BT(session); @@ -179,26 +179,13 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session) return (0); /* - * Checkpoint to flush out the file's changes. This usually happens on - * data handle close (which means we're holding the handle lock, so - * this call serializes with any session checkpoint). Bulk-cursors are - * a special case: they do not hold the handle lock and they still must - * serialize with checkpoints. Acquire the lower-level checkpoint lock - * and hold it until the handle is closed and the bulk-cursor flag has - * been cleared. - * We hold the lock so long for two reasons: first, checkpoint uses - * underlying btree handle structures (for example, the meta-tracking - * checkpoint resolution uses the block-manager reference), and because - * checkpoint writes "fake" checkpoint records for bulk-loaded files, - * and a real checkpoint, which we're creating here, can't be followed - * by more fake checkpoints. In summary, don't let a checkpoint happen - * unless all of the bulk cursor's information has been cleared. + * Bulk-load has special checkpoint locking requirements (see the lock + * function for details). Lock/unlock around the checkpoint and clear + * of the bulk-load flag. */ - ckpt_lock = 0; - if (F_ISSET(btree, WT_BTREE_BULK)) { - ckpt_lock = 1; - __wt_spin_lock(session, &S2C(session)->checkpoint_lock); - } + is_bulk = F_ISSET(btree, WT_BTREE_BULK) ? 1 : 0; + if (is_bulk) + __wt_checkpoint_bulk_lock(session, btree, 1); /* * The close can fail if an update cannot be written, return the EBUSY @@ -215,9 +202,8 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session) F_CLR(dhandle, WT_DHANDLE_OPEN); F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); -err: - if (ckpt_lock) - __wt_spin_unlock(session, &S2C(session)->checkpoint_lock); +err: if (is_bulk) + __wt_checkpoint_bulk_lock(session, btree, 0); return (ret); } diff --git a/src/include/btree.h b/src/include/btree.h index e78f81b3e1a..8181db1686c 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -60,8 +60,6 @@ struct __wt_btree { WT_DATA_HANDLE *dhandle; - WT_CKPT *ckpt; /* Checkpoint information */ - enum { BTREE_COL_FIX=1, /* Fixed-length column store */ BTREE_COL_VAR=2, /* Variable-length column store */ BTREE_ROW=3 /* Row-store */ @@ -105,7 +103,6 @@ struct __wt_btree { WT_REF root; /* Root page reference */ int modified; /* If the tree ever modified */ - int bulk_load_ok; /* Bulk-load is a possibility */ WT_BM *bm; /* Block manager reference */ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ @@ -118,8 +115,12 @@ struct __wt_btree { u_int evict_walk_skips; /* Number of walks skipped */ volatile uint32_t evict_busy; /* Count of threads in eviction */ + WT_CKPT *ckpt; /* Checkpoint information */ int checkpointing; /* Checkpoint in progress */ + int bulk_load_ok; /* Bulk-load is a possibility */ + WT_SPINLOCK bulk_ckpt_lock; /* Lock checkpoint and bulk close */ + /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ #define WT_BTREE_BULK 0x00100 /* Bulk-load handle */ #define WT_BTREE_NO_EVICTION 0x00200 /* Disable eviction */ diff --git a/src/include/extern.h b/src/include/extern.h index 715659eaeff..42e5b11b99c 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1566,6 +1566,9 @@ extern void __wt_txn_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_global_init(WT_CONNECTION_IMPL *conn, const char *cfg[]); extern void __wt_txn_global_destroy(WT_CONNECTION_IMPL *conn); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); +extern void __wt_checkpoint_bulk_lock( WT_SESSION_IMPL *session, + WT_BTREE *btree, + int getlock); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint_write_leaves(WT_SESSION_IMPL *session, const char *cfg[]); diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index c066254926c..a8cc34df485 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -434,6 +434,32 @@ __drop_to(WT_CKPT *ckptbase, const char *name, size_t len) } /* + * __wt_checkpoint_bulk_lock -- + * Lock/unlock when checkpointing a bulk-load file. + */ +void +__wt_checkpoint_bulk_lock( + WT_SESSION_IMPL *session, WT_BTREE *btree, int getlock) +{ + /* + * This function exists as a place for this comment: checkpoint does a + * read-modify-write cycle of the file's metadata, which means there's + * a potential race with other threads calling the checkpoint function. + * There are two paths into the checkpoint worker function, database + * checkpoints and handle close. Both exclusively hold the handle's + * lock, but bulk-load is a special case. Because bulk-load is likely + * a long-lived operation, database checkpoint is allowed to proceed + * without a bulk-load handle's lock. To avoid a database checkpoint + * racing with bulk-load handle close, we use a separate per-file lock + * acquired/released around the checkpoint. + */ + if (getlock) + __wt_spin_lock(session, &btree->bulk_ckpt_lock); + else + __wt_spin_unlock(session, &btree->bulk_ckpt_lock); +} + +/* * __checkpoint_worker -- * Checkpoint a tree. */ @@ -451,19 +477,19 @@ __checkpoint_worker( WT_DECL_RET; WT_LSN ckptlsn; const char *name; - int deleted, force, hot_backup_locked, track_ckpt; + int bulk_ckpt_locked, deleted, force, hot_backup_locked, track_ckpt; char *name_alloc; btree = S2BT(session); bm = btree->bm; conn = S2C(session); + dhandle = session->dhandle; + ckpt = ckptbase = NULL; INIT_LSN(&ckptlsn); - dhandle = session->dhandle; - name_alloc = NULL; - hot_backup_locked = 0; - name_alloc = NULL; + bulk_ckpt_locked = hot_backup_locked = 0; track_ckpt = 1; + name_alloc = NULL; /* * If closing a file that's never been modified, discard its blocks. @@ -475,14 +501,32 @@ __checkpoint_worker( return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD)); /* - * Get the list of checkpoints for this file. If there's no reference - * to the file in the metadata (the file is dead), then discard it from + * Bulk-load has special checkpoint locking requirements (see the lock + * function for details). Lock/unlock around the read-modify-write + * cycle of the file's metadata. We only lock here in the case of a + * database checkpoint, the bulk-load handle close path has already + * acquired the lock. + */ + if (is_checkpoint && F_ISSET(btree, WT_BTREE_BULK)) { + bulk_ckpt_locked = 1; + /* + * The bulk-load flag could be cleared after we test and before + * we acquire the lock. We don't care, all this lock does is + * single-thread this function, the race just means a bulk-load + * handle had the lock and closed while we waited. + */ + __wt_checkpoint_bulk_lock(session, btree, 1); + } + + /* + * If the file has no metadata (the file is dead), then discard it from * the cache without bothering to write any dirty pages. */ if ((ret = __wt_meta_ckptlist_get( session, dhandle->name, &ckptbase)) == WT_NOTFOUND) { WT_ASSERT(session, session->dhandle->session_ref == 0); - return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD)); + ret = __wt_cache_op(session, NULL, WT_SYNC_DISCARD); + goto done; } WT_ERR(ret); @@ -567,7 +611,7 @@ __checkpoint_worker( } if (!btree->modified && !force) { if (!is_checkpoint) - goto skip; + goto done; deleted = 0; WT_CKPT_FOREACH(ckptbase, ckpt) @@ -585,7 +629,7 @@ __checkpoint_worker( (strcmp(name, (ckpt - 1)->name) == 0 || (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT)))) - goto skip; + goto done; } /* Add a new checkpoint entry at the end of the list. */ @@ -786,9 +830,13 @@ fake: /* Update the object's metadata. */ WT_ERR(__wt_txn_checkpoint_log( session, 0, WT_TXN_LOG_CKPT_STOP, NULL)); +done: err: if (hot_backup_locked) __wt_spin_unlock(session, &conn->hot_backup_lock); -skip: __wt_meta_ckptlist_free(session, ckptbase); + if (bulk_ckpt_locked) + __wt_checkpoint_bulk_lock(session, btree, 0); + + __wt_meta_ckptlist_free(session, ckptbase); __wt_free(session, name_alloc); return (ret); } |