diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-06-04 16:19:27 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-06-04 16:19:27 +1000 |
commit | 4f05e1298898d51febaeaa3ab9e20de4bb2babf8 (patch) | |
tree | 497c03035fd73a68a68efb9040a1aee00645519a | |
parent | 00088cccba90f66cee108794a0ff3ce7a6507c01 (diff) | |
download | mongo-4f05e1298898d51febaeaa3ab9e20de4bb2babf8.tar.gz |
Fix races in table-level schema operations.
closes #191
-rw-r--r-- | src/conn/conn_btree.c | 8 | ||||
-rw-r--r-- | src/conn/conn_handle.c | 4 | ||||
-rw-r--r-- | src/cursor/cur_file.c | 2 | ||||
-rw-r--r-- | src/include/api.h | 1 | ||||
-rw-r--r-- | src/meta/meta_table.c | 41 | ||||
-rw-r--r-- | src/schema/schema_create.c | 16 | ||||
-rw-r--r-- | src/schema/schema_drop.c | 20 | ||||
-rw-r--r-- | src/schema/schema_open.c | 5 | ||||
-rw-r--r-- | src/schema/schema_truncate.c | 5 | ||||
-rw-r--r-- | src/schema/schema_worker.c | 1 | ||||
-rw-r--r-- | src/session/session_api.c | 24 | ||||
-rw-r--r-- | src/session/session_btree.c | 9 | ||||
-rw-r--r-- | test/fops/t.c | 25 |
13 files changed, 90 insertions, 71 deletions
diff --git a/src/conn/conn_btree.c b/src/conn/conn_btree.c index e5251289201..90331c2adca 100644 --- a/src/conn/conn_btree.c +++ b/src/conn/conn_btree.c @@ -335,8 +335,10 @@ __wt_conn_btree_close(WT_SESSION_IMPL *session, int locked) */ __wt_spin_lock(session, &conn->spinlock); inuse = --btree->refcnt > 0; - if (!inuse && !locked) + if (!inuse && !locked) { __wt_writelock(session, btree->rwlock); + F_SET(btree, WT_BTREE_EXCLUSIVE); + } __wt_spin_unlock(session, &conn->spinlock); if (!inuse) { @@ -350,8 +352,10 @@ __wt_conn_btree_close(WT_SESSION_IMPL *session, int locked) if (F_ISSET(btree, WT_BTREE_OPEN)) WT_TRET(__wt_conn_btree_sync_and_close(session)); - if (!locked) + if (!locked) { + F_CLR(btree, WT_BTREE_EXCLUSIVE); __wt_rwunlock(session, btree->rwlock); + } } return (ret); diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 2dc93131681..b8523261dfe 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -31,6 +31,9 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) /* File handle spinlock. */ __wt_spin_init(session, &conn->fh_lock); + /* Schema operation spinlock. */ + __wt_spin_init(session, &conn->schema_lock); + /* Serialized function call spinlock. */ __wt_spin_init(session, &conn->serial_lock); @@ -76,6 +79,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->fh_lock); __wt_spin_destroy(session, &conn->serial_lock); + __wt_spin_destroy(session, &conn->schema_lock); __wt_spin_destroy(session, &conn->spinlock); if (conn->ckpt_rwlock != NULL) diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index bc8b1955c0f..cc70d0c3ef6 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -290,7 +290,7 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_RET(__wt_session_get_btree(session, uri, cfg, bulk ? WT_BTREE_EXCLUSIVE : 0)); else - return (EINVAL); + WT_RET_MSG(session, EINVAL, "Unexpected object type"); return (__wt_curfile_create(session, owner, cfg, cursorp)); } diff --git a/src/include/api.h b/src/include/api.h index e340443d7aa..3e2483961ce 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -185,6 +185,7 @@ struct __wt_connection_impl { WT_SESSION_IMPL dummy_session; WT_SPINLOCK fh_lock; /* File handle queue spinlock */ + WT_SPINLOCK schema_lock; /* Schema operation spinlock */ WT_SPINLOCK serial_lock; /* Serial function call spinlock */ WT_SPINLOCK spinlock; /* General purpose spinlock */ diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index b189eaf1758..62260bfac48 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -46,8 +46,7 @@ __wt_metadata_open(WT_SESSION_IMPL *session) WT_ASSERT(session, session->metafile != NULL); /* The metafile doesn't need to stay locked -- release it. */ - WT_RET(__wt_session_release_btree(session)); - return (0); + return (__wt_session_release_btree(session)); } /* @@ -58,12 +57,20 @@ int __wt_metadata_cursor( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp) { + WT_BTREE *saved_btree; + WT_DECL_RET; const char *cfg[] = API_CONF_DEFAULTS(session, open_cursor, config); - WT_RET(__wt_metadata_open(session)); + saved_btree = session->btree; + WT_ERR(__wt_metadata_open(session)); + session->btree = session->metafile; - WT_RET(__wt_session_lock_btree(session, 0)); - return (__wt_curfile_create(session, NULL, cfg, cursorp)); + WT_ERR(__wt_session_lock_btree(session, 0)); + ret = __wt_curfile_create(session, NULL, cfg, cursorp); + + /* Restore the caller's btree. */ +err: session->btree = saved_btree; + return (ret); } /* @@ -74,7 +81,6 @@ int __wt_metadata_insert( WT_SESSION_IMPL *session, const char *key, const char *value) { - WT_BTREE *btree; WT_CURSOR *cursor; WT_DECL_RET; @@ -82,8 +88,6 @@ __wt_metadata_insert( WT_RET_MSG(session, EINVAL, "%s: insert not supported on the turtle file", key); - /* Save the caller's btree: the metadata cursor will overwrite it. */ - btree = session->btree; WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, key); cursor->set_value(cursor, value); @@ -92,9 +96,7 @@ __wt_metadata_insert( ret = __wt_meta_track_insert(session, key); WT_TRET(cursor->close(cursor)); - /* Restore the caller's btree. */ -err: session->btree = btree; - return (ret); +err: return (ret); } /* @@ -105,7 +107,6 @@ int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) { - WT_BTREE *btree; WT_CURSOR *cursor; WT_DECL_RET; @@ -115,16 +116,12 @@ __wt_metadata_update( if (WT_META_TRACKING(session)) WT_RET(__wt_meta_track_update(session, key)); - /* Save the caller's btree: the metadata cursor will overwrite it. */ - btree = session->btree; WT_RET(__wt_metadata_cursor(session, "overwrite", &cursor)); cursor->set_key(cursor, key); cursor->set_value(cursor, value); WT_TRET(cursor->insert(cursor)); WT_TRET(cursor->close(cursor)); - /* Restore the caller's btree. */ - session->btree = btree; return (ret); } @@ -135,7 +132,6 @@ __wt_metadata_update( int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) { - WT_BTREE *btree; WT_CURSOR *cursor; WT_DECL_RET; @@ -143,8 +139,6 @@ __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) WT_RET_MSG(session, EINVAL, "%s: remove not supported on the turtle file", key); - /* Save the caller's btree: the metadata cursor will overwrite it. */ - btree = session->btree; WT_RET(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, key); WT_TRET(cursor->search(cursor)); @@ -155,8 +149,6 @@ __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) } WT_TRET(cursor->close(cursor)); - /* Restore the caller's btree. */ - session->btree = btree; return (ret); } @@ -169,7 +161,6 @@ int __wt_metadata_read( WT_SESSION_IMPL *session, const char *key, const char **valuep) { - WT_BTREE *btree; WT_CURSOR *cursor; WT_DECL_RET; const char *value; @@ -177,16 +168,12 @@ __wt_metadata_read( if (__metadata_turtle(key)) return (__wt_meta_turtle_read(session, key, valuep)); - /* Save the caller's btree: the metadata cursor will overwrite it. */ - btree = session->btree; WT_RET(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, key); WT_ERR(cursor->search(cursor)); WT_ERR(cursor->get_value(cursor, &value)); WT_ERR(__wt_strdup(session, value, valuep)); -err: WT_TRET(cursor->close(cursor)); - /* Restore the caller's btree. */ - session->btree = btree; +err: WT_TRET(cursor->close(cursor)); return (ret); } diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c index 093fcb19326..77e50172671 100644 --- a/src/schema/schema_create.c +++ b/src/schema/schema_create.c @@ -170,6 +170,7 @@ __create_colgroup(WT_SESSION_IMPL *session, WT_ERR(__wt_buf_fmt(session, &uribuf, "file:%s", filename)); fileuri = uribuf.data; + WT_ERR(__create_file(session, fileuri, exclusive, fileconf)); if ((ret = __wt_metadata_insert(session, name, cgconf)) != 0) { /* * If the entry already exists in the metadata, we're done. @@ -179,10 +180,9 @@ __create_colgroup(WT_SESSION_IMPL *session, ret = exclusive ? EEXIST : 0; goto err; } - WT_ERR(__create_file(session, fileuri, exclusive, fileconf)); WT_ERR(__wt_schema_open_colgroups(session, table)); -err: __wt_free(session, cgconf); +err: __wt_free(session, cgconf); __wt_free(session, fileconf); __wt_free(session, oldconf); __wt_buf_free(session, &fmt); @@ -279,6 +279,7 @@ __create_index(WT_SESSION_IMPL *session, WT_ERR(__wt_buf_fmt(session, &uribuf, "file:%s", filename)); fileuri = uribuf.data; + WT_ERR(__create_file(session, fileuri, exclusive, fileconf)); if ((ret = __wt_metadata_insert(session, name, idxconf)) != 0) { /* * If the entry already exists in the metadata, we're done. @@ -288,7 +289,6 @@ __create_index(WT_SESSION_IMPL *session, ret = exclusive ? EEXIST : 0; goto err; } - WT_ERR(__create_file(session, fileuri, exclusive, fileconf)); err: __wt_free(session, fileconf); __wt_free(session, idxconf); @@ -340,7 +340,15 @@ __create_table(WT_SESSION_IMPL *session, return (ret); WT_RET(__wt_config_collapse(session, cfg, &tableconf)); - WT_ERR(__wt_metadata_insert(session, name, tableconf)); + if ((ret = __wt_metadata_insert(session, name, tableconf)) != 0) { + /* + * If the entry already exists in the metadata, we're done. + * This is an error for exclusive creates but okay otherwise. + */ + if (ret == WT_DUPLICATE_KEY) + ret = exclusive ? EEXIST : 0; + goto err; + } /* Attempt to open the table now to catch any errors. */ WT_ERR(__wt_schema_get_table( diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c index fbcc65e1979..8d1386e0369 100644 --- a/src/schema/schema_drop.c +++ b/src/schema/schema_drop.c @@ -24,8 +24,8 @@ __drop_file( return (EINVAL); if (session->btree == NULL && - (ret = __wt_session_get_btree( - session, uri, cfg, WT_BTREE_EXCLUSIVE | WT_BTREE_LOCK_ONLY)) != 0) { + (ret = __wt_session_get_btree(session, uri, cfg, + WT_BTREE_EXCLUSIVE | WT_BTREE_LOCK_ONLY)) != 0) { if (ret == WT_NOTFOUND || ret == ENOENT) ret = 0; return (ret); @@ -115,8 +115,8 @@ __drop_colgroup( * Try to get the btree handle. It will be unlocked by * __wt_conn_btree_close_all. */ - if ((ret = __wt_schema_get_btree( - session, uri, strlen(uri), cfg, WT_BTREE_EXCLUSIVE)) != 0) { + if ((ret = __wt_schema_get_btree(session, uri, strlen(uri), cfg, + WT_BTREE_EXCLUSIVE | WT_BTREE_LOCK_ONLY)) != 0) { if (ret == WT_NOTFOUND || ret == ENOENT) ret = 0; return (ret); @@ -158,8 +158,8 @@ __drop_index( * Try to get the btree handle. It will be unlocked by * __wt_conn_btree_close_all. */ - if ((ret = __wt_schema_get_btree( - session, uri, strlen(uri), cfg, WT_BTREE_EXCLUSIVE)) != 0) { + if ((ret = __wt_schema_get_btree(session, uri, strlen(uri), cfg, + WT_BTREE_EXCLUSIVE | WT_BTREE_LOCK_ONLY)) != 0) { if (ret == WT_NOTFOUND || ret == ENOENT) ret = 0; return (ret); @@ -199,22 +199,22 @@ __drop_table( for (i = 0; i < WT_COLGROUPS(table); i++) { if (table->cg_name[i] == NULL) continue; - WT_TRET(__drop_colgroup( + WT_ERR(__drop_colgroup( session, table->cg_name[i], force, cfg)); } /* Drop the indices. */ - WT_TRET(__wt_schema_open_index(session, table, NULL, 0)); + WT_ERR(__wt_schema_open_index(session, table, NULL, 0)); for (i = 0; i < table->nindices; i++) { if (table->idx_name[i] == NULL) continue; WT_TRET(__drop_index(session, table->idx_name[i], force, cfg)); } - WT_TRET(__wt_schema_remove_table(session, table)); + WT_ERR(__wt_schema_remove_table(session, table)); /* Remove the metadata entry (ignore missing items). */ - WT_TRET(__wt_metadata_remove(session, uri)); + WT_ERR(__wt_metadata_remove(session, uri)); err: if (force && ret == WT_NOTFOUND) ret = 0; diff --git a/src/schema/schema_open.c b/src/schema/schema_open.c index 1081e889f35..f71551bebe9 100644 --- a/src/schema/schema_open.c +++ b/src/schema/schema_open.c @@ -242,8 +242,10 @@ __open_index(WT_SESSION_IMPL *session, WT_TABLE *table, err: __wt_buf_free(session, &cols); __wt_buf_free(session, &uribuf); - if (session->btree != NULL) + if (btree != NULL) { + session->btree = btree; WT_TRET(__wt_session_release_btree(session)); + } return (ret); } @@ -401,7 +403,6 @@ __wt_schema_open_table(WT_SESSION_IMPL *session, WT_ERR(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cg_name)); WT_ERR(__wt_schema_open_colgroups(session, table)); - *tablep = table; if (0) { diff --git a/src/schema/schema_truncate.c b/src/schema/schema_truncate.c index 6e53c7ab2ae..bd8a64736b4 100644 --- a/src/schema/schema_truncate.c +++ b/src/schema/schema_truncate.c @@ -45,11 +45,6 @@ __truncate_table(WT_SESSION_IMPL *session, const char *name) WT_RET(__wt_schema_get_table(session, name, strlen(name), &table)); WT_RET(__wt_scr_alloc(session, 0, &namebuf)); - /* - * We are closing the column groups, they must be reopened for future - * accesses to the table. - */ - table->cg_complete = 0; /* Truncate the column groups. */ for (i = 0; i < WT_COLGROUPS(table); i++) { diff --git a/src/schema/schema_worker.c b/src/schema/schema_worker.c index 814d4e37965..0b4e83776a2 100644 --- a/src/schema/schema_worker.c +++ b/src/schema/schema_worker.c @@ -38,6 +38,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session, } else if (WT_PREFIX_SKIP(tablename, "table:")) { WT_RET(__wt_schema_get_table(session, tablename, strlen(tablename), &table)); + WT_ASSERT(session, session->btree == NULL); for (i = 0; i < WT_COLGROUPS(table); i++) { cgname = table->cg_name[i]; diff --git a/src/session/session_api.c b/src/session/session_api.c index 9e0f002058b..3d1c8eff80d 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -178,8 +178,9 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config) /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_schema_name_check(session, uri)); - - WT_ERR(__wt_schema_create(session, uri, config)); + __wt_spin_lock(session, &S2C(session)->schema_lock); + ret = __wt_schema_create(session, uri, config); + __wt_spin_unlock(session, &S2C(session)->schema_lock); err: API_END_NOTFOUND_MAP(session, ret); } @@ -197,9 +198,12 @@ __session_rename(WT_SESSION *wt_session, session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, rename, config, cfg); + + WT_ERR(__wt_meta_track_on(session)); ret = __wt_schema_rename(session, uri, newname, cfg); -err: API_END_NOTFOUND_MAP(session, ret); +err: WT_TRET(__wt_meta_track_off(session, ret != 0)); + API_END_NOTFOUND_MAP(session, ret); } /* @@ -220,9 +224,11 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config) /* Dropping snapshots is a different code path. */ WT_ERR(__wt_config_gets(session, cfg, "snapshot", &cval)); + __wt_spin_lock(session, &S2C(session)->schema_lock); ret = (cval.len == 0) ? __wt_schema_drop(session, uri, cfg) : __wt_schema_worker( session, uri, cfg, __wt_snapshot_drop, WT_BTREE_SNAPSHOT_OP); + __wt_spin_unlock(session, &S2C(session)->schema_lock); err: /* Note: drop operations cannot be unrolled (yet?). */ WT_TRET(__wt_meta_track_off(session, 0)); @@ -241,8 +247,10 @@ __session_dumpfile(WT_SESSION *wt_session, const char *uri, const char *config) session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, dumpfile, config, cfg); + __wt_spin_lock(session, &S2C(session)->schema_lock); ret = __wt_schema_worker(session, uri, cfg, __wt_dumpfile, WT_BTREE_EXCLUSIVE | WT_BTREE_VERIFY); + __wt_spin_unlock(session, &S2C(session)->schema_lock); err: API_END_NOTFOUND_MAP(session, ret); } @@ -260,8 +268,10 @@ __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config) session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, salvage, config, cfg); + __wt_spin_lock(session, &S2C(session)->schema_lock); ret = __wt_schema_worker(session, uri, cfg, __wt_salvage, WT_BTREE_EXCLUSIVE | WT_BTREE_SALVAGE); + __wt_spin_unlock(session, &S2C(session)->schema_lock); err: API_END_NOTFOUND_MAP(session, ret); } @@ -281,10 +291,12 @@ __session_sync(WT_SESSION *wt_session, const char *uri, const char *config) SESSION_API_CALL(session, sync, config, cfg); WT_ERR(__wt_meta_track_on(session)); + __wt_spin_lock(session, &S2C(session)->schema_lock); ret = __wt_schema_worker( session, uri, cfg, __wt_snapshot, WT_BTREE_SNAPSHOT_OP); + __wt_spin_unlock(session, &S2C(session)->schema_lock); -err: WT_TRET(__wt_meta_track_off(session, ret != 0)); +err: WT_TRET(__wt_meta_track_off(session, ret != 0)); API_END_NOTFOUND_MAP(session, ret); } @@ -369,8 +381,10 @@ __session_upgrade(WT_SESSION *wt_session, const char *uri, const char *config) session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, upgrade, config, cfg); + __wt_spin_lock(session, &S2C(session)->schema_lock); ret = __wt_schema_worker(session, uri, cfg, __wt_upgrade, WT_BTREE_EXCLUSIVE | WT_BTREE_UPGRADE); + __wt_spin_unlock(session, &S2C(session)->schema_lock); err: API_END_NOTFOUND_MAP(session, ret); } @@ -388,8 +402,10 @@ __session_verify(WT_SESSION *wt_session, const char *uri, const char *config) session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, verify, config, cfg); + __wt_spin_lock(session, &S2C(session)->schema_lock); ret = __wt_schema_worker(session, uri, cfg, __wt_verify, WT_BTREE_EXCLUSIVE | WT_BTREE_VERIFY); + __wt_spin_unlock(session, &S2C(session)->schema_lock); err: API_END_NOTFOUND_MAP(session, ret); } diff --git a/src/session/session_btree.c b/src/session/session_btree.c index 02e6f08fe0b..00b01e25c05 100644 --- a/src/session/session_btree.c +++ b/src/session/session_btree.c @@ -116,6 +116,7 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) F_CLR(btree, WT_BTREE_EXCLUSIVE); __wt_rwunlock(session, btree->rwlock); + session->btree = NULL; return (ret); } @@ -171,8 +172,12 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, return (0); if ((ret = - __wt_session_lock_btree(session, flags)) != WT_NOTFOUND) + __wt_session_lock_btree(session, flags)) != WT_NOTFOUND) { + WT_ASSERT(session, ret != 0 || + LF_ISSET(WT_BTREE_EXCLUSIVE) == + F_ISSET(session->btree, WT_BTREE_EXCLUSIVE)); return (ret); + } ret = 0; } @@ -183,6 +188,8 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, WT_ASSERT(session, LF_ISSET(WT_BTREE_LOCK_ONLY) || F_ISSET(session->btree, WT_BTREE_OPEN)); + WT_ASSERT(session, LF_ISSET(WT_BTREE_EXCLUSIVE) == + F_ISSET(session->btree, WT_BTREE_EXCLUSIVE)); return (0); } diff --git a/test/fops/t.c b/test/fops/t.c index 468c982cad9..ad1d7811771 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -28,6 +28,7 @@ main(int argc, char *argv[]) u_int nthreads; int ch, cnt, runs; char *config_open; + const char **objp, *objs[] = { "file:__wt", "table:__wt", NULL }; if ((progname = strrchr(argv[0], '/')) == NULL) progname = argv[0]; @@ -76,21 +77,15 @@ main(int argc, char *argv[]) for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) { shutdown(); /* Clean up previous runs */ - uri = "file:__wt"; - printf(" %d: %u threads on %s\n", cnt, nthreads, uri); - wt_startup(config_open); - if (fop_start(nthreads)) - return (EXIT_FAILURE); - wt_shutdown(); - printf("\n"); - - uri = "table:__wt"; - printf(" %d: %u threads on %s\n", cnt, nthreads, uri); - wt_startup(config_open); - if (fop_start(nthreads)) - return (EXIT_FAILURE); - wt_shutdown(); - printf("\n"); + for (objp = objs; *objp != NULL; objp++) { + uri = *objp; + printf("%5d: %u threads on %s\n", cnt, nthreads, uri); + wt_startup(config_open); + if (fop_start(nthreads)) + return (EXIT_FAILURE); + wt_shutdown(); + printf("\n"); + } } return (0); } |