diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2015-08-13 15:58:40 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-08-13 16:24:45 +1000 |
commit | e15b3263bef39506df9b029b22981b438524dd8c (patch) | |
tree | c6e5aa5e2d9b927368322990ed4ac9f709fea069 | |
parent | df9836b1b2c8d21af51131710465b84a64d4b14b (diff) | |
download | mongo-e15b3263bef39506df9b029b22981b438524dd8c.tar.gz |
WT-2038 Use TAILQ for all lists.
Otherwise remove is O(N), which may not show up until running real workloads (while holding locks).
(cherry picked from commit 79e6050da59f2dc1523ef72b5ff0b652a938d4e1)
-rw-r--r-- | bench/wtperf/config.c | 1 | ||||
-rw-r--r-- | bench/wtperf/wtperf_truncate.c | 216 | ||||
-rwxr-xr-x | dist/s_style | 5 | ||||
-rw-r--r-- | src/async/async_api.c | 10 | ||||
-rw-r--r-- | src/async/async_worker.c | 10 | ||||
-rw-r--r-- | src/block/block_open.c | 2 | ||||
-rw-r--r-- | src/conn/conn_dhandle.c | 16 | ||||
-rw-r--r-- | src/conn/conn_handle.c | 12 | ||||
-rw-r--r-- | src/conn/conn_open.c | 4 | ||||
-rw-r--r-- | src/conn/conn_sweep.c | 15 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 8 | ||||
-rw-r--r-- | src/include/async.h | 8 | ||||
-rw-r--r-- | src/include/block.h | 4 | ||||
-rw-r--r-- | src/include/connection.h | 40 | ||||
-rw-r--r-- | src/include/dhandle.h | 4 | ||||
-rw-r--r-- | src/include/os.h | 4 | ||||
-rw-r--r-- | src/include/schema.h | 4 | ||||
-rw-r--r-- | src/include/session.h | 12 | ||||
-rw-r--r-- | src/os_posix/os_open.c | 4 | ||||
-rw-r--r-- | src/os_posix/os_remove.c | 2 | ||||
-rw-r--r-- | src/os_win/os_open.c | 4 | ||||
-rw-r--r-- | src/os_win/os_remove.c | 2 | ||||
-rw-r--r-- | src/schema/schema_list.c | 12 | ||||
-rw-r--r-- | src/session/session_api.c | 6 | ||||
-rw-r--r-- | src/session/session_dhandle.c | 18 | ||||
-rw-r--r-- | src/txn/txn_nsnap.c | 369 |
26 files changed, 689 insertions, 103 deletions
diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c index e27b4861bed..ae677f9d91b 100644 --- a/bench/wtperf/config.c +++ b/bench/wtperf/config.c @@ -95,6 +95,7 @@ config_assign(CONFIG *dest, const CONFIG *src) *pstr = newstr; } } + return (0); } diff --git a/bench/wtperf/wtperf_truncate.c b/bench/wtperf/wtperf_truncate.c new file mode 100644 index 00000000000..581d1987947 --- /dev/null +++ b/bench/wtperf/wtperf_truncate.c @@ -0,0 +1,216 @@ +/*- + * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "wtperf.h" + +static inline uint64_t +decode_key(char *key_buf) +{ + return (strtoull(key_buf, NULL, 10)); +} + +int +setup_truncate(CONFIG *cfg, CONFIG_THREAD *thread, WT_SESSION *session) { + + TRUNCATE_CONFIG *trunc_cfg; + TRUNCATE_QUEUE_ENTRY *truncate_item; + WORKLOAD *workload; + WT_CURSOR *cursor; + char *key, *truncate_key; + int ret; + uint64_t end_point, final_stone_gap, i, start_point; + + end_point = final_stone_gap = start_point = 0; + trunc_cfg = &thread->trunc_cfg; + workload = thread->workload; + + /* We are limited to only one table when running truncate. */ + if ((ret = session->open_cursor( + session, cfg->uris[0], NULL, NULL, &cursor)) != 0) + goto err; + + /* How many entries between each stone. */ + trunc_cfg->stone_gap = + (workload->truncate_count * workload->truncate_pct) / 100; + /* How many stones we need. */ + trunc_cfg->needed_stones = + workload->truncate_count / trunc_cfg->stone_gap; + + final_stone_gap = trunc_cfg->stone_gap; + + /* Reset this value for use again. */ + trunc_cfg->stone_gap = 0; + + /* + * Here we check if there is data in the collection. If there is + * data available, then we need to setup some initial truncation + * stones. + */ + if ((ret = cursor->next(cursor)) != 0 || + (ret = cursor->get_key(cursor, &key)) != 0) { + lprintf(cfg, ret, 0, "truncate setup start: failed"); + goto err; + } + + start_point = decode_key(key); + if ((cursor->reset(cursor)) != 0 || (ret = cursor->prev(cursor)) != 0 || + (ret = cursor->get_key(cursor, &key)) != 0) { + lprintf(cfg, ret, 0, "truncate setup end: failed"); + goto err; + } + end_point = decode_key(key); + + /* Assign stones if there are enough documents. */ + if (start_point + trunc_cfg->needed_stones > end_point) + trunc_cfg->stone_gap = 0; + else + trunc_cfg->stone_gap = + (end_point - start_point) / trunc_cfg->needed_stones; + + /* If we have enough data allocate some stones. */ + if (trunc_cfg->stone_gap != 0) { + trunc_cfg->expected_total = (end_point - start_point); + for (i = 1; i <= trunc_cfg->needed_stones; i++) { + truncate_key = calloc(cfg->key_sz, 1); + if (truncate_key == NULL) { + ret = enomem(cfg); + goto err; + } + truncate_item = calloc(sizeof(TRUNCATE_QUEUE_ENTRY), 1); + if (truncate_item == NULL) { + free(truncate_key); + ret = enomem(cfg); + goto err; + } + generate_key( + cfg, truncate_key, trunc_cfg->stone_gap * i); + truncate_item->key = truncate_key; + truncate_item->diff = + (trunc_cfg->stone_gap * i) - trunc_cfg->last_key; + TAILQ_INSERT_TAIL( &cfg->stone_head, truncate_item, q); + trunc_cfg->last_key = trunc_cfg->stone_gap * i; + trunc_cfg->num_stones++; + } + } + trunc_cfg->stone_gap = final_stone_gap; + +err: if ((ret = cursor->close(cursor)) != 0) { + lprintf(cfg, ret, 0, "truncate setup: cursor close failed"); + } + return (ret); +} + +int +run_truncate(CONFIG *cfg, CONFIG_THREAD *thread, + WT_CURSOR *cursor, WT_SESSION *session, int *truncatedp) { + + TRUNCATE_CONFIG *trunc_cfg; + TRUNCATE_QUEUE_ENTRY *truncate_item; + char *truncate_key; + int ret, t_ret; + + ret = 0; + trunc_cfg = &thread->trunc_cfg; + + *truncatedp = 0; + /* Update the total inserts */ + trunc_cfg->total_inserts = sum_insert_ops(cfg); + trunc_cfg->expected_total += + (trunc_cfg->total_inserts - trunc_cfg->last_total_inserts); + trunc_cfg->last_total_inserts = trunc_cfg->total_inserts; + + /* We are done if there isn't enough data to trigger a new milestone. */ + if (trunc_cfg->expected_total <= trunc_cfg->needed_stones) + return (0); + + while (trunc_cfg->num_stones < trunc_cfg->needed_stones) { + trunc_cfg->last_key += trunc_cfg->stone_gap; + truncate_key = calloc(cfg->key_sz, 1); + if (truncate_key == NULL) { + lprintf(cfg, ENOMEM, 0, + "truncate: couldn't allocate key array"); + return (ENOMEM); + } + truncate_item = calloc(sizeof(TRUNCATE_QUEUE_ENTRY), 1); + if (truncate_item == NULL) { + free(truncate_key); + lprintf(cfg, ENOMEM, 0, + "truncate: couldn't allocate item"); + return (ENOMEM); + } + generate_key(cfg, truncate_key, trunc_cfg->last_key); + truncate_item->key = truncate_key; + truncate_item->diff = trunc_cfg->stone_gap; + TAILQ_INSERT_TAIL(&cfg->stone_head, truncate_item, q); + trunc_cfg->num_stones++; + } + + /* We are done if there isn't enough data to trigger a truncate. */ + if (trunc_cfg->num_stones == 0 || + trunc_cfg->expected_total <= thread->workload->truncate_count) + return (0); + + truncate_item = TAILQ_FIRST(&cfg->stone_head); + trunc_cfg->num_stones--; + TAILQ_REMOVE(&cfg->stone_head, truncate_item, q); + cursor->set_key(cursor,truncate_item->key); + if ((ret = cursor->search(cursor)) != 0) { + lprintf(cfg, ret, 0, "Truncate search: failed"); + goto err; + } + + if ((ret = session->truncate(session, NULL, NULL, cursor, NULL)) != 0) { + lprintf(cfg, ret, 0, "Truncate: failed"); + goto err; + } + + + *truncatedp = 1; + trunc_cfg->expected_total -= truncate_item->diff; + +err: free(truncate_item->key); + free(truncate_item); + t_ret = cursor->reset(cursor); + if (t_ret != 0) + lprintf(cfg, t_ret, 0, "Cursor reset failed"); + if (ret == 0 && t_ret != 0) + ret = t_ret; + return (ret); +} + +void +cleanup_truncate_config(CONFIG *cfg) { + TRUNCATE_QUEUE_ENTRY *truncate_item; + + while (!TAILQ_EMPTY(&cfg->stone_head)) { + truncate_item = TAILQ_FIRST(&cfg->stone_head); + TAILQ_REMOVE(&cfg->stone_head, truncate_item, q); + free(truncate_item->key); + free(truncate_item); + } +} diff --git a/dist/s_style b/dist/s_style index ccee9404441..b0a642a4e26 100755 --- a/dist/s_style +++ b/dist/s_style @@ -64,6 +64,11 @@ for f in \ cat $t fi + if ! expr "$f" : 'src/include/queue\.h' > /dev/null && + egrep 'STAIL_|SLIST_|\bLIST_' $f ; then + echo "$f: use TAILQ for all lists" + fi + if ! expr "$f" : 'src/os_posix/.*' > /dev/null && ! expr "$f" : 'src/os_win/.*' > /dev/null && ! expr "$f" : 'src/include/extern.h' > /dev/null && diff --git a/src/async/async_api.c b/src/async/async_api.c index 44e492cb0e5..9874d7aab00 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -43,7 +43,7 @@ __async_get_format(WT_CONNECTION_IMPL *conn, const char *uri, * is a possibility a duplicate entry might be inserted, but * that is not harmful. */ - STAILQ_FOREACH(af, &async->formatqh, q) { + TAILQ_FOREACH(af, &async->formatqh, q) { if (af->uri_hash == uri_hash && af->cfg_hash == cfg_hash) goto setup; } @@ -71,7 +71,7 @@ __async_get_format(WT_CONNECTION_IMPL *conn, const char *uri, WT_ERR(c->close(c)); c = NULL; - STAILQ_INSERT_HEAD(&async->formatqh, af, q); + TAILQ_INSERT_HEAD(&async->formatqh, af, q); __wt_spin_unlock(session, &async->ops_lock); WT_ERR(wt_session->close(wt_session, NULL)); @@ -237,7 +237,7 @@ __async_start(WT_SESSION_IMPL *session) */ WT_RET(__wt_calloc_one(session, &conn->async)); async = conn->async; - STAILQ_INIT(&async->formatqh); + TAILQ_INIT(&async->formatqh); WT_RET(__wt_spin_init(session, &async->ops_lock, "ops")); WT_RET(__wt_cond_alloc(session, "async flush", 0, &async->flush_cond)); WT_RET(__wt_async_op_init(session)); @@ -461,9 +461,9 @@ __wt_async_destroy(WT_SESSION_IMPL *session) } /* Free format resources */ - af = STAILQ_FIRST(&async->formatqh); + af = TAILQ_FIRST(&async->formatqh); while (af != NULL) { - afnext = STAILQ_NEXT(af, q); + afnext = TAILQ_NEXT(af, q); __wt_free(session, af->uri); __wt_free(session, af->config); __wt_free(session, af->key_format); diff --git a/src/async/async_worker.c b/src/async/async_worker.c index 4f372d05d19..473e7103832 100644 --- a/src/async/async_worker.c +++ b/src/async/async_worker.c @@ -135,7 +135,7 @@ __async_worker_cursor(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op, if (op->optype == WT_AOP_COMPACT) return (0); WT_ASSERT(session, op->format != NULL); - STAILQ_FOREACH(ac, &worker->cursorqh, q) { + TAILQ_FOREACH(ac, &worker->cursorqh, q) { if (op->format->cfg_hash == ac->cfg_hash && op->format->uri_hash == ac->uri_hash) { /* @@ -156,7 +156,7 @@ __async_worker_cursor(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op, ac->cfg_hash = op->format->cfg_hash; ac->uri_hash = op->format->uri_hash; ac->c = c; - STAILQ_INSERT_HEAD(&worker->cursorqh, ac, q); + TAILQ_INSERT_HEAD(&worker->cursorqh, ac, q); worker->num_cursors++; *cursorp = c; return (0); @@ -297,7 +297,7 @@ __wt_async_worker(void *arg) async = conn->async; worker.num_cursors = 0; - STAILQ_INIT(&worker.cursorqh); + TAILQ_INIT(&worker.cursorqh); while (F_ISSET(conn, WT_CONN_SERVER_ASYNC) && F_ISSET(session, WT_SESSION_SERVER_ASYNC)) { WT_ERR(__async_op_dequeue(conn, session, &op)); @@ -346,9 +346,9 @@ err: WT_PANIC_MSG(session, ret, "async worker error"); * Worker thread cleanup, close our cached cursors and free all the * WT_ASYNC_CURSOR structures. */ - ac = STAILQ_FIRST(&worker.cursorqh); + ac = TAILQ_FIRST(&worker.cursorqh); while (ac != NULL) { - acnext = STAILQ_NEXT(ac, q); + acnext = TAILQ_NEXT(ac, q); WT_TRET(ac->c->close(ac->c)); __wt_free(session, ac); ac = acnext; diff --git a/src/block/block_open.c b/src/block/block_open.c index 4728066b487..e1b7894aee5 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -186,7 +186,7 @@ __wt_block_open(WT_SESSION_IMPL *session, hash = __wt_hash_city64(filename, strlen(filename)); bucket = hash % WT_HASH_ARRAY_SIZE; __wt_spin_lock(session, &conn->block_lock); - SLIST_FOREACH(block, &conn->blockhash[bucket], hashl) { + TAILQ_FOREACH(block, &conn->blockhash[bucket], hashq) { if (strcmp(filename, block->name) == 0) { ++block->ref; *blockp = block; diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 12aae3e4875..54cc1308537 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -146,7 +146,7 @@ __wt_conn_dhandle_find(WT_SESSION_IMPL *session, /* Increment the reference count if we already have the btree open. */ bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; if (ckpt == NULL) { - SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) { + TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) { if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) continue; if (dhandle->checkpoint == NULL && @@ -156,7 +156,7 @@ __wt_conn_dhandle_find(WT_SESSION_IMPL *session, } } } else - SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) { + TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) { if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) continue; if (dhandle->checkpoint != NULL && @@ -569,7 +569,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, if (uri != NULL) { bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE; - SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) + TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && !F_ISSET(dhandle, WT_DHANDLE_DEAD) && strcmp(uri, dhandle->name) == 0 && @@ -577,7 +577,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, WT_RET(__conn_btree_apply_internal( session, dhandle, func, cfg)); } else { - SLIST_FOREACH(dhandle, &conn->dhlh, l) + TAILQ_FOREACH(dhandle, &conn->dhqh, q) if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && !F_ISSET(dhandle, WT_DHANDLE_DEAD) && (apply_checkpoints || @@ -654,7 +654,7 @@ __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, hash = __wt_hash_city64(uri, strlen(uri)); bucket = hash % WT_HASH_ARRAY_SIZE; - SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) + TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && !F_ISSET(dhandle, WT_DHANDLE_DEAD) && (hash == dhandle->name_hash && @@ -703,7 +703,7 @@ __wt_conn_dhandle_close_all( WT_ASSERT(session, session->dhandle == NULL); bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; - SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) { + TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) { if (strcmp(dhandle->name, name) != 0 || F_ISSET(dhandle, WT_DHANDLE_DEAD)) continue; @@ -847,7 +847,7 @@ __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) * the list, so we do it the hard way. */ restart: - SLIST_FOREACH(dhandle, &conn->dhlh, l) { + TAILQ_FOREACH(dhandle, &conn->dhqh, q) { if (WT_IS_METADATA(dhandle)) continue; @@ -866,7 +866,7 @@ restart: F_SET(session, WT_SESSION_NO_DATA_HANDLES); /* Close the metadata file handle. */ - while ((dhandle = SLIST_FIRST(&conn->dhlh)) != NULL) + while ((dhandle = TAILQ_FIRST(&conn->dhqh)) != NULL) WT_WITH_DHANDLE(session, dhandle, WT_TRET(__wt_conn_dhandle_discard_single(session, 1, 0))); diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index b212409be80..a9b52e8a6fe 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -21,14 +21,14 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) session = conn->default_session; for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) { - SLIST_INIT(&conn->dhhash[i]); /* Data handle hash lists */ - SLIST_INIT(&conn->fhhash[i]); /* File handle hash lists */ + TAILQ_INIT(&conn->dhhash[i]); /* Data handle hash lists */ + TAILQ_INIT(&conn->fhhash[i]); /* File handle hash lists */ } - SLIST_INIT(&conn->dhlh); /* Data handle list */ + TAILQ_INIT(&conn->dhqh); /* Data handle list */ TAILQ_INIT(&conn->dlhqh); /* Library list */ TAILQ_INIT(&conn->dsrcqh); /* Data source list */ - SLIST_INIT(&conn->fhlh); /* File list */ + TAILQ_INIT(&conn->fhqh); /* File list */ TAILQ_INIT(&conn->collqh); /* Collator list */ TAILQ_INIT(&conn->compqh); /* Compressor list */ TAILQ_INIT(&conn->extractorqh); /* Extractor list */ @@ -88,8 +88,8 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) */ WT_RET(__wt_spin_init(session, &conn->block_lock, "block manager")); for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) - SLIST_INIT(&conn->blockhash[i]);/* Block handle hash lists */ - SLIST_INIT(&conn->blocklh); /* Block manager list */ + TAILQ_INIT(&conn->blockhash[i]);/* Block handle hash lists */ + TAILQ_INIT(&conn->blockqh); /* Block manager list */ return (0); } diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index ca8335fbdb9..a140a06628d 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -144,14 +144,14 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) * Complain if files weren't closed, ignoring the lock file, we'll * close it in a minute. */ - SLIST_FOREACH(fh, &conn->fhlh, l) { + TAILQ_FOREACH(fh, &conn->fhqh, q) { if (fh == conn->lock_fh) continue; __wt_errx(session, "Connection has open file handles: %s", fh->name); WT_TRET(__wt_close(session, &fh)); - fh = SLIST_FIRST(&conn->fhlh); + fh = TAILQ_FIRST(&conn->fhqh); } /* Disconnect from shared cache - must be before cache destroy. */ diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index b23c8151db1..4b925094c7f 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -21,8 +21,7 @@ __sweep_mark(WT_SESSION_IMPL *session, time_t now) conn = S2C(session); - WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); - SLIST_FOREACH(dhandle, &conn->dhlh, l) { + TAILQ_FOREACH(dhandle, &conn->dhqh, q) { if (WT_IS_METADATA(dhandle)) continue; @@ -117,8 +116,7 @@ __sweep_expire(WT_SESSION_IMPL *session, time_t now) conn = S2C(session); - WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); - SLIST_FOREACH(dhandle, &conn->dhlh, l) { + TAILQ_FOREACH(dhandle, &conn->dhqh, q) { /* * Ignore open files once the btree file count is below the * minimum number of handles. @@ -158,8 +156,7 @@ __sweep_discard_trees( *dead_handlesp = 0; - WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); - SLIST_FOREACH(dhandle, &conn->dhlh, l) { + TAILQ_FOREACH(dhandle, &conn->dhqh, q) { if (!F_ISSET(dhandle, WT_DHANDLE_OPEN | WT_DHANDLE_EXCLUSIVE) && (dhandle->timeofdiscard == 0 || now <= dhandle->timeofdiscard + conn->sweep_idle_time)) @@ -232,10 +229,10 @@ __sweep_remove_handles(WT_SESSION_IMPL *session, time_t now) conn = S2C(session); - for (dhandle = SLIST_FIRST(&conn->dhlh); + for (dhandle = TAILQ_FIRST(&conn->dhqh); dhandle != NULL; dhandle = dhandle_next) { - dhandle_next = SLIST_NEXT(dhandle, l); + dhandle_next = TAILQ_NEXT(dhandle, q); if (WT_IS_METADATA(dhandle)) continue; if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN) || @@ -283,6 +280,8 @@ __sweep_server(void *arg) (uint64_t)conn->sweep_interval * WT_MILLION)); WT_ERR(__wt_seconds(session, &now)); + WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); + /* * Mark handles with a time of death, and report whether any * handles are marked dead. If sweep_idle_time is 0, handles diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index e9d58419c39..4d6e482b1d9 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -638,7 +638,7 @@ __evict_clear_all_walks(WT_SESSION_IMPL *session) conn = S2C(session); - SLIST_FOREACH(dhandle, &conn->dhlh, l) + TAILQ_FOREACH(dhandle, &conn->dhqh, q) if (WT_PREFIX_MATCH(dhandle->name, "file:")) WT_WITH_DHANDLE(session, dhandle, WT_TRET(__evict_clear_walk(session))); @@ -976,14 +976,14 @@ retry: while (slot < max_entries && ret == 0) { if ((dhandle = cache->evict_file_next) != NULL) cache->evict_file_next = NULL; else - dhandle = SLIST_FIRST(&conn->dhlh); + dhandle = TAILQ_FIRST(&conn->dhqh); } else { if (incr) { WT_ASSERT(session, dhandle->session_inuse > 0); (void)WT_ATOMIC_SUB4(dhandle->session_inuse, 1); incr = 0; } - dhandle = SLIST_NEXT(dhandle, l); + dhandle = TAILQ_NEXT(dhandle, q); } /* If we reach the end of the list, we're done. */ @@ -1542,7 +1542,7 @@ __wt_cache_dump(WT_SESSION_IMPL *session) conn = S2C(session); total_bytes = 0; - SLIST_FOREACH(dhandle, &conn->dhlh, l) { + TAILQ_FOREACH(dhandle, &conn->dhqh, q) { if (!WT_PREFIX_MATCH(dhandle->name, "file:") || !F_ISSET(dhandle, WT_DHANDLE_OPEN)) continue; diff --git a/src/include/async.h b/src/include/async.h index 88ecad6eb2c..c8d9fa5aa91 100644 --- a/src/include/async.h +++ b/src/include/async.h @@ -31,7 +31,7 @@ typedef enum { * The URI/config/format cache. */ struct __wt_async_format { - STAILQ_ENTRY(__wt_async_format) q; + TAILQ_ENTRY(__wt_async_format) q; const char *config; uint64_t cfg_hash; /* Config hash */ const char *uri; @@ -88,7 +88,7 @@ struct __wt_async { uint64_t alloc_tail; /* Next slot to dequeue */ uint64_t tail_slot; /* Worker slot consumed */ - STAILQ_HEAD(__wt_async_format_qh, __wt_async_format) formatqh; + TAILQ_HEAD(__wt_async_format_qh, __wt_async_format) formatqh; int cur_queue; /* Currently enqueued */ int max_queue; /* Maximum enqueued */ WT_ASYNC_FLUSH_STATE flush_state; /* Queue flush state */ @@ -112,7 +112,7 @@ struct __wt_async { * has a cache of async cursors to reuse for operations. */ struct __wt_async_cursor { - STAILQ_ENTRY(__wt_async_cursor) q; /* Worker cache */ + TAILQ_ENTRY(__wt_async_cursor) q; /* Worker cache */ uint64_t cfg_hash; /* Config hash */ uint64_t uri_hash; /* URI hash */ WT_CURSOR *c; /* WT cursor */ @@ -124,6 +124,6 @@ struct __wt_async_cursor { */ struct __wt_async_worker_state { uint32_t id; - STAILQ_HEAD(__wt_cursor_qh, __wt_async_cursor) cursorqh; + TAILQ_HEAD(__wt_cursor_qh, __wt_async_cursor) cursorqh; uint32_t num_cursors; }; diff --git a/src/include/block.h b/src/include/block.h index fb8987efdb4..8dd8e12a5c1 100644 --- a/src/include/block.h +++ b/src/include/block.h @@ -215,8 +215,8 @@ struct __wt_block { /* A list of block manager handles, sharing a file descriptor. */ uint32_t ref; /* References */ WT_FH *fh; /* Backing file handle */ - SLIST_ENTRY(__wt_block) l; /* Linked list of handles */ - SLIST_ENTRY(__wt_block) hashl; /* Hashed list of handles */ + TAILQ_ENTRY(__wt_block) q; /* Linked list of handles */ + TAILQ_ENTRY(__wt_block) hashq; /* Hashed list of handles */ /* Configuration information, set when the file is opened. */ uint32_t allocfirst; /* Allocation is first-fit */ diff --git a/src/include/connection.h b/src/include/connection.h index f24459a4147..34eb704e5dd 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -16,7 +16,7 @@ struct __wt_process { WT_SPINLOCK spinlock; /* Per-process spinlock */ - /* Locked: connection queue */ + /* Locked: connection list */ TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh; WT_CACHE_POOL *cache_pool; }; @@ -84,14 +84,13 @@ struct __wt_named_extractor { * main queue and the hashed queue. */ #define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) do { \ - SLIST_INSERT_HEAD(&(conn)->dhlh, dhandle, l); \ - SLIST_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashl); \ + TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \ + TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \ } while (0) #define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \ - SLIST_REMOVE(&(conn)->dhlh, dhandle, __wt_data_handle, l); \ - SLIST_REMOVE(&(conn)->dhhash[bucket], \ - dhandle, __wt_data_handle, hashl); \ + TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \ + TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \ } while (0) /* @@ -99,14 +98,13 @@ struct __wt_named_extractor { * main queue and the hashed queue. */ #define WT_CONN_BLOCK_INSERT(conn, block, bucket) do { \ - SLIST_INSERT_HEAD(&(conn)->blocklh, block, l); \ - SLIST_INSERT_HEAD(&(conn)->blockhash[bucket], block, hashl); \ + TAILQ_INSERT_HEAD(&(conn)->blockqh, block, q); \ + TAILQ_INSERT_HEAD(&(conn)->blockhash[bucket], block, hashq); \ } while (0) #define WT_CONN_BLOCK_REMOVE(conn, block, bucket) do { \ - SLIST_REMOVE(&(conn)->blocklh, block, __wt_block, l); \ - SLIST_REMOVE( \ - &(conn)->blockhash[bucket], block, __wt_block, hashl); \ + TAILQ_REMOVE(&(conn)->blockqh, block, q); \ + TAILQ_REMOVE(&(conn)->blockhash[bucket], block, hashq); \ } while (0) /* @@ -114,13 +112,13 @@ struct __wt_named_extractor { * main queue and the hashed queue. */ #define WT_CONN_FILE_INSERT(conn, fh, bucket) do { \ - SLIST_INSERT_HEAD(&(conn)->fhlh, fh, l); \ - SLIST_INSERT_HEAD(&(conn)->fhhash[bucket], fh, hashl); \ + TAILQ_INSERT_HEAD(&(conn)->fhqh, fh, q); \ + TAILQ_INSERT_HEAD(&(conn)->fhhash[bucket], fh, hashq); \ } while (0) #define WT_CONN_FILE_REMOVE(conn, fh, bucket) do { \ - SLIST_REMOVE(&(conn)->fhlh, fh, __wt_fh, l); \ - SLIST_REMOVE(&(conn)->fhhash[bucket], fh, __wt_fh, hashl); \ + TAILQ_REMOVE(&(conn)->fhqh, fh, q); \ + TAILQ_REMOVE(&(conn)->fhhash[bucket], fh, hashq); \ } while (0) /* @@ -185,20 +183,20 @@ struct __wt_connection_impl { */ /* Locked: data handle hash array */ #define WT_HASH_ARRAY_SIZE 512 - SLIST_HEAD(__wt_dhhash, __wt_data_handle) dhhash[WT_HASH_ARRAY_SIZE]; + TAILQ_HEAD(__wt_dhhash, __wt_data_handle) dhhash[WT_HASH_ARRAY_SIZE]; /* Locked: data handle list */ - SLIST_HEAD(__wt_dhandle_lh, __wt_data_handle) dhlh; + TAILQ_HEAD(__wt_dhandle_qh, __wt_data_handle) dhqh; /* Locked: LSM handle list. */ TAILQ_HEAD(__wt_lsm_qh, __wt_lsm_tree) lsmqh; /* Locked: file list */ - SLIST_HEAD(__wt_fhhash, __wt_fh) fhhash[WT_HASH_ARRAY_SIZE]; - SLIST_HEAD(__wt_fh_lh, __wt_fh) fhlh; + TAILQ_HEAD(__wt_fhhash, __wt_fh) fhhash[WT_HASH_ARRAY_SIZE]; + TAILQ_HEAD(__wt_fh_qh, __wt_fh) fhqh; /* Locked: library list */ TAILQ_HEAD(__wt_dlh_qh, __wt_dlh) dlhqh; WT_SPINLOCK block_lock; /* Locked: block manager list */ - SLIST_HEAD(__wt_blockhash, __wt_block) blockhash[WT_HASH_ARRAY_SIZE]; - SLIST_HEAD(__wt_block_lh, __wt_block) blocklh; + TAILQ_HEAD(__wt_blockhash, __wt_block) blockhash[WT_HASH_ARRAY_SIZE]; + TAILQ_HEAD(__wt_block_qh, __wt_block) blockqh; u_int open_btree_count; /* Locked: open writable btree count */ uint32_t next_file_id; /* Locked: file ID counter */ diff --git a/src/include/dhandle.h b/src/include/dhandle.h index b188c19566b..e7231616c12 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -34,8 +34,8 @@ */ struct __wt_data_handle { WT_RWLOCK *rwlock; /* Lock for shared/exclusive ops */ - SLIST_ENTRY(__wt_data_handle) l; - SLIST_ENTRY(__wt_data_handle) hashl; + TAILQ_ENTRY(__wt_data_handle) q; + TAILQ_ENTRY(__wt_data_handle) hashq; /* * Sessions caching a connection's data handle will have a non-zero diff --git a/src/include/os.h b/src/include/os.h index edb59b0f521..518b124f547 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -77,8 +77,8 @@ typedef enum { struct __wt_fh { char *name; /* File name */ uint64_t name_hash; /* Hash of name */ - SLIST_ENTRY(__wt_fh) l; /* List of open handles */ - SLIST_ENTRY(__wt_fh) hashl; /* Hashed list of handles */ + TAILQ_ENTRY(__wt_fh) q; /* List of open handles */ + TAILQ_ENTRY(__wt_fh) hashq; /* Hashed list of handles */ u_int ref; /* Reference count */ diff --git a/src/include/schema.h b/src/include/schema.h index 5d524534b39..50d8e37800b 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -62,8 +62,8 @@ struct __wt_table { WT_INDEX **indices; size_t idx_alloc; - SLIST_ENTRY(__wt_table) l; - SLIST_ENTRY(__wt_table) hashl; + TAILQ_ENTRY(__wt_table) q; + TAILQ_ENTRY(__wt_table) hashq; int cg_complete, idx_complete, is_simple; u_int ncolgroups, nindices, nkey_columns; diff --git a/src/include/session.h b/src/include/session.h index 3efb8011e3b..afcb0c94f5b 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -14,8 +14,8 @@ struct __wt_data_handle_cache { WT_DATA_HANDLE *dhandle; - SLIST_ENTRY(__wt_data_handle_cache) l; - SLIST_ENTRY(__wt_data_handle_cache) hashl; + TAILQ_ENTRY(__wt_data_handle_cache) q; + TAILQ_ENTRY(__wt_data_handle_cache) hashq; }; /* @@ -67,7 +67,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { * across session close - so it is declared further down. */ /* Session handle reference list */ - SLIST_HEAD(__dhandles, __wt_data_handle_cache) dhandles; + TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles; time_t last_sweep; /* Last sweep for dead handles */ WT_CURSOR *cursor; /* Current cursor */ @@ -91,7 +91,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { * table of lists. The hash table list is kept in allocated memory * that lives across session close - so it is declared further down. */ - SLIST_HEAD(__tables, __wt_table) tables; + TAILQ_HEAD(__tables, __wt_table) tables; WT_ITEM **scratch; /* Temporary memory for any function */ u_int scratch_alloc; /* Currently allocated */ @@ -151,9 +151,9 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { WT_RAND_STATE rnd; /* Random number generation state */ /* Hashed handle reference list array */ - SLIST_HEAD(__dhandles_hash, __wt_data_handle_cache) *dhhash; + TAILQ_HEAD(__dhandles_hash, __wt_data_handle_cache) *dhhash; /* Hashed table reference list array */ - SLIST_HEAD(__tables_hash, __wt_table) *tablehash; + TAILQ_HEAD(__tables_hash, __wt_table) *tablehash; /* * Splits can "free" memory that may still be in use, and we use a diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 5fa2e83415b..d8afbf88f87 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -53,7 +53,7 @@ __wt_open(WT_SESSION_IMPL *session, hash = __wt_hash_city64(name, strlen(name)); bucket = hash % WT_HASH_ARRAY_SIZE; __wt_spin_lock(session, &conn->fh_lock); - SLIST_FOREACH(tfh, &conn->fhhash[bucket], hashl) { + TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) { if (strcmp(name, tfh->name) == 0) { ++tfh->ref; *fhp = tfh; @@ -167,7 +167,7 @@ setupfh: */ matched = 0; __wt_spin_lock(session, &conn->fh_lock); - SLIST_FOREACH(tfh, &conn->fhhash[bucket], hashl) { + TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) { if (strcmp(name, tfh->name) == 0) { ++tfh->ref; *fhp = tfh; diff --git a/src/os_posix/os_remove.c b/src/os_posix/os_remove.c index 3fc692d8755..96bbba9bab2 100644 --- a/src/os_posix/os_remove.c +++ b/src/os_posix/os_remove.c @@ -29,7 +29,7 @@ __remove_file_check(WT_SESSION_IMPL *session, const char *name) * level should have closed it before removing. */ __wt_spin_lock(session, &conn->fh_lock); - SLIST_FOREACH(fh, &conn->fhhash[bucket], hashl) + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) if (strcmp(name, fh->name) == 0) break; __wt_spin_unlock(session, &conn->fh_lock); diff --git a/src/os_win/os_open.c b/src/os_win/os_open.c index a77bef63b9d..1c6f5636501 100644 --- a/src/os_win/os_open.c +++ b/src/os_win/os_open.c @@ -39,7 +39,7 @@ __wt_open(WT_SESSION_IMPL *session, /* Increment the reference count if we already have the file open. */ matched = 0; __wt_spin_lock(session, &conn->fh_lock); - SLIST_FOREACH(tfh, &conn->fhhash[bucket], hashl) + TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) if (strcmp(name, tfh->name) == 0) { ++tfh->ref; *fhp = tfh; @@ -160,7 +160,7 @@ setupfh: */ matched = 0; __wt_spin_lock(session, &conn->fh_lock); - SLIST_FOREACH(tfh, &conn->fhhash[bucket], hashl) + TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) if (strcmp(name, tfh->name) == 0) { ++tfh->ref; *fhp = tfh; diff --git a/src/os_win/os_remove.c b/src/os_win/os_remove.c index 0c6396c775f..55b50030064 100644 --- a/src/os_win/os_remove.c +++ b/src/os_win/os_remove.c @@ -29,7 +29,7 @@ __remove_file_check(WT_SESSION_IMPL *session, const char *name) * level should have closed it before removing. */ __wt_spin_lock(session, &conn->fh_lock); - SLIST_FOREACH(fh, &conn->fhhash[bucket], hashl) + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) if (strcmp(name, fh->name) == 0) break; __wt_spin_unlock(session, &conn->fh_lock); diff --git a/src/schema/schema_list.c b/src/schema/schema_list.c index 57ea3b96647..5ab9f1c160e 100644 --- a/src/schema/schema_list.c +++ b/src/schema/schema_list.c @@ -29,8 +29,8 @@ __schema_add_table(WT_SESSION_IMPL *session, WT_RET(ret); bucket = table->name_hash % WT_HASH_ARRAY_SIZE; - SLIST_INSERT_HEAD(&session->tables, table, l); - SLIST_INSERT_HEAD(&session->tablehash[bucket], table, hashl); + TAILQ_INSERT_HEAD(&session->tables, table, q); + TAILQ_INSERT_HEAD(&session->tablehash[bucket], table, hashq); *tablep = table; return (0); @@ -51,7 +51,7 @@ __schema_find_table(WT_SESSION_IMPL *session, bucket = __wt_hash_city64(name, namelen) % WT_HASH_ARRAY_SIZE; restart: - SLIST_FOREACH(table, &session->tablehash[bucket], hashl) { + TAILQ_FOREACH(table, &session->tablehash[bucket], hashq) { tablename = table->name; (void)WT_PREFIX_SKIP(tablename, "table:"); if (WT_STRING_MATCH(tablename, name, namelen)) { @@ -218,8 +218,8 @@ __wt_schema_remove_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_ASSERT(session, table->refcnt <= 1); bucket = table->name_hash % WT_HASH_ARRAY_SIZE; - SLIST_REMOVE(&session->tables, table, __wt_table, l); - SLIST_REMOVE(&session->tablehash[bucket], table, __wt_table, hashl); + TAILQ_REMOVE(&session->tables, table, q); + TAILQ_REMOVE(&session->tablehash[bucket], table, hashq); return (__wt_schema_destroy_table(session, table)); } @@ -233,7 +233,7 @@ __wt_schema_close_tables(WT_SESSION_IMPL *session) WT_DECL_RET; WT_TABLE *table; - while ((table = SLIST_FIRST(&session->tables)) != NULL) + while ((table = TAILQ_FIRST(&session->tables)) != NULL) WT_TRET(__wt_schema_remove_table(session, table)); return (ret); } diff --git a/src/session/session_api.c b/src/session/session_api.c index b042e73f7d5..881be7428e5 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1070,7 +1070,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, event_handler == NULL ? session->event_handler : event_handler); TAILQ_INIT(&session_ret->cursors); - SLIST_INIT(&session_ret->dhandles); + TAILQ_INIT(&session_ret->dhandles); /* * If we don't have one, allocate the dhandle hash array. * Allocate the table hash array as well. @@ -1082,8 +1082,8 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_ERR(__wt_calloc(session_ret, WT_HASH_ARRAY_SIZE, sizeof(struct __tables_hash), &session_ret->tablehash)); for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) { - SLIST_INIT(&session_ret->dhhash[i]); - SLIST_INIT(&session_ret->tablehash[i]); + TAILQ_INIT(&session_ret->dhhash[i]); + TAILQ_INIT(&session_ret->tablehash[i]); } /* Initialize transaction support: default to read-committed. */ diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 427cff3f351..731e54884db 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -25,8 +25,8 @@ __session_add_dhandle( dhandle_cache->dhandle = session->dhandle; bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE; - SLIST_INSERT_HEAD(&session->dhandles, dhandle_cache, l); - SLIST_INSERT_HEAD(&session->dhhash[bucket], dhandle_cache, hashl); + TAILQ_INSERT_HEAD(&session->dhandles, dhandle_cache, q); + TAILQ_INSERT_HEAD(&session->dhhash[bucket], dhandle_cache, hashq); if (dhandle_cachep != NULL) *dhandle_cachep = dhandle_cache; @@ -242,10 +242,8 @@ __session_discard_btree( uint64_t bucket; bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE; - SLIST_REMOVE( - &session->dhandles, dhandle_cache, __wt_data_handle_cache, l); - SLIST_REMOVE(&session->dhhash[bucket], - dhandle_cache, __wt_data_handle_cache, hashl); + TAILQ_REMOVE(&session->dhandles, dhandle_cache, q); + TAILQ_REMOVE(&session->dhhash[bucket], dhandle_cache, hashq); (void)WT_ATOMIC_SUB4(dhandle_cache->dhandle->session_ref, 1); @@ -261,7 +259,7 @@ __wt_session_close_cache(WT_SESSION_IMPL *session) { WT_DATA_HANDLE_CACHE *dhandle_cache; - while ((dhandle_cache = SLIST_FIRST(&session->dhandles)) != NULL) + while ((dhandle_cache = TAILQ_FIRST(&session->dhandles)) != NULL) __session_discard_btree(session, dhandle_cache); } @@ -290,9 +288,9 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session) WT_STAT_FAST_CONN_INCR(session, dh_session_sweeps); - dhandle_cache = SLIST_FIRST(&session->dhandles); + dhandle_cache = TAILQ_FIRST(&session->dhandles); while (dhandle_cache != NULL) { - dhandle_cache_next = SLIST_NEXT(dhandle_cache, l); + dhandle_cache_next = TAILQ_NEXT(dhandle_cache, q); dhandle = dhandle_cache->dhandle; if (dhandle != session->dhandle && dhandle->session_inuse == 0 && @@ -341,7 +339,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, dhandle = NULL; bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE; - SLIST_FOREACH(dhandle_cache, &session->dhhash[bucket], hashl) { + TAILQ_FOREACH(dhandle_cache, &session->dhhash[bucket], hashq) { dhandle = dhandle_cache->dhandle; if (strcmp(uri, dhandle->name) != 0) continue; diff --git a/src/txn/txn_nsnap.c b/src/txn/txn_nsnap.c new file mode 100644 index 00000000000..d0316ecef95 --- /dev/null +++ b/src/txn/txn_nsnap.c @@ -0,0 +1,369 @@ +/*- + * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __nsnap_destroy -- + * Destroy a named snapshot structure. + */ +static void +__nsnap_destroy(WT_SESSION_IMPL *session, WT_NAMED_SNAPSHOT *nsnap) +{ + __wt_free(session, nsnap->name); + __wt_free(session, nsnap->snapshot); + __wt_free(session, nsnap); +} + +/* + * __nsnap_drop_one -- + * Drop a single named snapshot. The named snapshot lock must be held + * write locked. + */ +static int +__nsnap_drop_one(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name) +{ + WT_DECL_RET; + WT_NAMED_SNAPSHOT *found; + WT_TXN_GLOBAL *txn_global; + + txn_global = &S2C(session)->txn_global; + + TAILQ_FOREACH(found, &txn_global->nsnaph, q) + if (WT_STRING_MATCH(found->name, name->str, name->len)) + break; + + if (found == NULL) + return (WT_NOTFOUND); + + /* Bump the global ID if we are removing the first entry */ + if (found == TAILQ_FIRST(&txn_global->nsnaph)) + txn_global->nsnap_oldest_id = (TAILQ_NEXT(found, q) != NULL) ? + TAILQ_NEXT(found, q)->snap_min : WT_TXN_NONE; + TAILQ_REMOVE(&txn_global->nsnaph, found, q); + __nsnap_destroy(session, found); + + return (ret); +} + +/* + * __nsnap_drop_to -- + * Drop named snapshots, if the name is NULL all snapshots will be + * dropped. The named snapshot lock must be held write locked. + */ +static int +__nsnap_drop_to(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, int inclusive) +{ + WT_DECL_RET; + WT_NAMED_SNAPSHOT *last, *nsnap, *prev; + WT_TXN_GLOBAL *txn_global; + uint64_t new_nsnap_oldest; + + last = nsnap = prev = NULL; + txn_global = &S2C(session)->txn_global; + + if (TAILQ_EMPTY(&txn_global->nsnaph)) { + if (name == NULL) + return (0); + /* + * Dropping specific snapshots when there aren't any it's an + * error. + */ + WT_RET_MSG(session, EINVAL, + "Named snapshot '%.*s' for drop not found", + (int)name->len, name->str); + } + + /* + * The new ID will be none if we are removing all named snapshots + * which is the default behavior of this loop. + */ + new_nsnap_oldest = WT_TXN_NONE; + if (name != NULL) { + TAILQ_FOREACH(last, &txn_global->nsnaph, q) { + if (WT_STRING_MATCH(last->name, name->str, name->len)) + break; + prev = last; + } + if (last == NULL) + WT_RET_MSG(session, EINVAL, + "Named snapshot '%.*s' for drop not found", + (int)name->len, name->str); + + if (!inclusive) { + /* We are done if a drop before points to the head */ + if (prev == 0) + return (0); + last = prev; + } + + if (TAILQ_NEXT(last, q) != NULL) + new_nsnap_oldest = TAILQ_NEXT(last, q)->snap_min; + } + + do { + nsnap = TAILQ_FIRST(&txn_global->nsnaph); + WT_ASSERT(session, nsnap != NULL); + TAILQ_REMOVE(&txn_global->nsnaph, nsnap, q); + __nsnap_destroy(session, nsnap); + /* Last will be NULL in the all case so it will never match */ + } while (nsnap != last && !TAILQ_EMPTY(&txn_global->nsnaph)); + + /* Now that the queue of named snapshots is updated, update the ID */ + txn_global->nsnap_oldest_id = new_nsnap_oldest; + + return (ret); +} + +/* + * __wt_txn_named_snapshot_begin -- + * Begin an named in-memory snapshot. + */ +int +__wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONFIG_ITEM cval; + WT_DECL_RET; + WT_NAMED_SNAPSHOT *nsnap, *nsnap_new; + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + const char *txn_cfg[] = + { WT_CONFIG_BASE(session, WT_SESSION_begin_transaction), + "isolation=snapshot", NULL }; + int started_txn; + + started_txn = 0; + nsnap_new = NULL; + txn_global = &S2C(session)->txn_global; + txn = &session->txn; + + WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval)); + WT_ASSERT(session, cval.len != 0); + + if (!F_ISSET(txn, WT_TXN_RUNNING)) { + WT_RET(__wt_txn_begin(session, txn_cfg)); + started_txn = 1; + } + F_SET(txn, WT_TXN_READONLY); + + /* Save a copy of the transaction's snapshot. */ + WT_ERR(__wt_calloc_one(session, &nsnap_new)); + nsnap = nsnap_new; + WT_ERR(__wt_strndup(session, cval.str, cval.len, &nsnap->name)); + nsnap->snap_min = txn->snap_min; + nsnap->snap_max = txn->snap_max; + if (txn->snapshot_count > 0) { + WT_ERR(__wt_calloc_def( + session, txn->snapshot_count, &nsnap->snapshot)); + memcpy(nsnap->snapshot, txn->snapshot, + txn->snapshot_count * sizeof(*nsnap->snapshot)); + } + nsnap->snapshot_count = txn->snapshot_count; + + /* Update the list. */ + + /* + * The semantic is that a new snapshot with the same name as an + * existing snapshot will replace the old one. + */ + WT_ERR_NOTFOUND_OK(__nsnap_drop_one(session, &cval)); + + if (TAILQ_EMPTY(&txn_global->nsnaph)) + txn_global->nsnap_oldest_id = nsnap_new->snap_min; + TAILQ_INSERT_TAIL(&txn_global->nsnaph, nsnap_new, q); + nsnap_new = NULL; + +err: if (started_txn) + WT_TRET(__wt_txn_rollback(session, NULL)); + else if (ret == 0) + F_SET(txn, WT_TXN_NAMED_SNAPSHOT); + + if (nsnap_new != NULL) + __nsnap_destroy(session, nsnap_new); + + return (ret); +} + +/* + * __wt_txn_named_snapshot_drop -- + * Drop named snapshots + */ +int +__wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONFIG objectconf; + WT_CONFIG_ITEM all_config, k, names_config, to_config, before_config, v; + WT_DECL_RET; + + WT_RET(__wt_config_gets_def(session, cfg, "drop.all", 0, &all_config)); + WT_RET(__wt_config_gets_def( + session, cfg, "drop.names", 0, &names_config)); + WT_RET(__wt_config_gets_def(session, cfg, "drop.to", 0, &to_config)); + WT_RET(__wt_config_gets_def( + session, cfg, "drop.before", 0, &before_config)); + + if (all_config.val != 0) + WT_RET(__nsnap_drop_to(session, NULL, 1)); + else if (before_config.len != 0) + WT_RET(__nsnap_drop_to(session, &before_config, 0)); + else if (to_config.len != 0) + WT_RET(__nsnap_drop_to(session, &to_config, 1)); + + /* We are done if there are no named drops */ + + if (names_config.len != 0) { + WT_RET(__wt_config_subinit( + session, &objectconf, &names_config)); + while ((ret = __wt_config_next(&objectconf, &k, &v)) == 0) { + ret = __nsnap_drop_one(session, &k); + if (ret != 0) + WT_RET_MSG(session, EINVAL, + "Named snapshot '%.*s' for drop not found", + (int)k.len, k.str); + } + if (ret == WT_NOTFOUND) + ret = 0; + } + + return (ret); +} + +/* + * __wt_txn_named_snapshot_get -- + * Lookup a named snapshot for a transaction. + */ +int +__wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval) +{ + WT_NAMED_SNAPSHOT *nsnap; + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *txn_state; + + txn = &session->txn; + txn_global = &S2C(session)->txn_global; + txn_state = WT_SESSION_TXN_STATE(session); + + txn->isolation = WT_ISO_SNAPSHOT; + if (session->ncursors > 0) + WT_RET(__wt_session_copy_values(session)); + + WT_RET(__wt_readlock(session, txn_global->nsnap_rwlock)); + TAILQ_FOREACH(nsnap, &txn_global->nsnaph, q) + if (WT_STRING_MATCH(nsnap->name, nameval->str, nameval->len)) { + txn->snap_min = txn_state->snap_min = nsnap->snap_min; + txn->snap_max = nsnap->snap_max; + if ((txn->snapshot_count = nsnap->snapshot_count) != 0) + memcpy(txn->snapshot, nsnap->snapshot, + nsnap->snapshot_count * + sizeof(*nsnap->snapshot)); + F_SET(txn, WT_TXN_HAS_SNAPSHOT); + break; + } + WT_RET(__wt_readunlock(session, txn_global->nsnap_rwlock)); + + if (nsnap == NULL) + WT_RET_MSG(session, EINVAL, + "Named snapshot '%.*s' not found", + (int)nameval->len, nameval->str); + + /* Flag that this transaction is opened on a named snapshot */ + F_SET(txn, WT_TXN_NAMED_SNAPSHOT); + + return (0); +} + +/* + * __wt_txn_named_snapshot_config -- + * Check the configuration for a named snapshot + */ +int +__wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, + const char *cfg[], int *has_create, int *has_drops) +{ + WT_CONFIG_ITEM cval; + WT_CONFIG_ITEM all_config, names_config, to_config, before_config; + WT_TXN *txn; + + txn = &session->txn; + *has_create = *has_drops = 0; + + /* Verify that the name is legal. */ + WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval)); + if (cval.len != 0) { + if (WT_STRING_MATCH("all", cval.str, cval.len)) + WT_RET_MSG(session, EINVAL, + "Can't create snapshot with reserved \"all\" name"); + + WT_RET(__wt_name_check(session, cval.str, cval.len)); + + if (F_ISSET(txn, WT_TXN_RUNNING) && + txn->isolation != WT_ISO_SNAPSHOT) + WT_RET_MSG(session, EINVAL, + "Can't create a named snapshot from a running " + "transaction that isn't snapshot isolation"); + else if (F_ISSET(txn, WT_TXN_RUNNING) && txn->mod_count != 0) + WT_RET_MSG(session, EINVAL, + "Can't create a named snapshot from a running " + "transaction that has made updates"); + *has_create = 1; + } + + /* Verify that the drop configuration is sane. */ + WT_RET(__wt_config_gets_def(session, cfg, "drop.all", 0, &all_config)); + WT_RET(__wt_config_gets_def( + session, cfg, "drop.names", 0, &names_config)); + WT_RET(__wt_config_gets_def(session, cfg, "drop.to", 0, &to_config)); + WT_RET(__wt_config_gets_def( + session, cfg, "drop.before", 0, &before_config)); + + /* Avoid more work if no drops are configured. */ + if (all_config.val != 0 || names_config.len != 0 || + before_config.len != 0 || to_config.len != 0) { + if (before_config.len != 0 && to_config.len != 0) + WT_RET_MSG(session, EINVAL, + "Illegal configuration; named snapshot drop can't " + "specify both before and to options"); + if (all_config.val != 0 && (names_config.len != 0 || + to_config.len != 0 || before_config.len != 0)) + WT_RET_MSG(session, EINVAL, + "Illegal configuration; named snapshot drop can't " + "specify all and any other options"); + *has_drops = 1; + } + + if (!*has_create && !*has_drops) + WT_RET_MSG(session, EINVAL, + "WT_SESSION::snapshot API called without any drop or " + "name option."); + + return (0); +} + +/* + * __wt_txn_named_snapshot_destroy -- + * Destroy all named snapshots on connection close + */ +int +__wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session) +{ + WT_NAMED_SNAPSHOT *nsnap; + WT_TXN_GLOBAL *txn_global; + + txn_global = &S2C(session)->txn_global; + txn_global->nsnap_oldest_id = WT_TXN_NONE; + + while (!TAILQ_EMPTY(&txn_global->nsnaph)) { + nsnap = TAILQ_FIRST(&txn_global->nsnaph); + WT_ASSERT(session, nsnap != NULL); + TAILQ_REMOVE(&txn_global->nsnaph, nsnap, q); + __nsnap_destroy(session, nsnap); + } + + return (0); +} |