summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2014-06-25 15:23:17 +1000
committerMichael Cahill <michael.cahill@wiredtiger.com>2014-06-25 15:23:17 +1000
commit5ab2ad4a045dee9021bd58298a34825339e2677b (patch)
tree55c28d16147d005381e54d4d57cc75f93e953fdf /src
parente6e4d4e2724720d23352d8f07ae5c3108d15b8db (diff)
downloadmongo-5ab2ad4a045dee9021bd58298a34825339e2677b.tar.gz
Use a split generation number to determine when memory replaced by a split can be freed.
Diffstat (limited to 'src')
-rw-r--r--src/btree/bt_cursor.c18
-rw-r--r--src/btree/bt_walk.c10
-rw-r--r--src/btree/rec_split.c3
-rw-r--r--src/conn/conn_handle.c2
-rw-r--r--src/include/btree.i21
-rw-r--r--src/include/connection.h2
-rw-r--r--src/include/extern.h1
-rw-r--r--src/include/session.h14
-rw-r--r--src/include/wiredtiger.in4
-rw-r--r--src/session/session_misc.c60
-rw-r--r--src/support/stat.c4
11 files changed, 68 insertions, 71 deletions
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index ede7d733bfc..8a667392aeb 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -216,13 +216,14 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
static inline int
__cursor_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- WT_BTREE *prev_btree;
+ uint64_t prev_split_gen;
WT_DECL_RET;
- prev_btree = session->active_btree;
- WT_PUBLISH(session->active_btree, cbt->btree);
+ if ((prev_split_gen = session->split_gen) == 0)
+ WT_PUBLISH(session->split_gen, S2C(session)->split_gen);
ret = __wt_col_search(session, cbt->iface.recno, NULL, cbt);
- session->active_btree = prev_btree;
+ if (prev_split_gen == 0)
+ session->split_gen = 0;
return (ret);
}
@@ -233,13 +234,14 @@ __cursor_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
static inline int
__cursor_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int insert)
{
- WT_BTREE *prev_btree;
+ uint64_t prev_split_gen;
WT_DECL_RET;
- prev_btree = session->active_btree;
- WT_PUBLISH(session->active_btree, cbt->btree);
+ if ((prev_split_gen = session->split_gen) == 0)
+ WT_PUBLISH(session->split_gen, S2C(session)->split_gen);
ret = __wt_row_search(session, &cbt->iface.key, NULL, cbt, insert);
- session->active_btree = prev_btree;
+ if (prev_split_gen == 0)
+ session->split_gen = 0;
return (ret);
}
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index f5a93210a26..ed9992b89ab 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -14,12 +14,13 @@
int
__wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
{
- WT_BTREE *btree, *prev_tree;
+ WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_INDEX *pindex;
WT_REF *couple, *ref;
WT_TXN_STATE *txn_state;
+ uint64_t prev_split_gen;
int descending, prev, skip;
uint32_t slot;
@@ -31,8 +32,8 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
* may want to free. Publish that the tree is active during this
* window.
*/
- prev_tree = session->active_btree;
- WT_PUBLISH(session->active_btree, btree);
+ if ((prev_split_gen = session->split_gen) == 0)
+ WT_PUBLISH(session->split_gen, S2C(session)->split_gen);
/*
* !!!
@@ -279,6 +280,7 @@ descend: couple = ref;
done:
err: if (txn_state != NULL)
txn_state->snap_min = WT_TXN_NONE;
- session->active_btree = prev_tree;
+ if (prev_split_gen == 0)
+ session->split_gen = 0;
return (ret);
}
diff --git a/src/btree/rec_split.c b/src/btree/rec_split.c
index 8296e19542d..6d80924896c 100644
--- a/src/btree/rec_split.c
+++ b/src/btree/rec_split.c
@@ -31,7 +31,8 @@ __safe_free(
* We have swapped something in a page: if we don't have exclusive
* access, check whether there are other threads in the same tree.
*/
- if (!exclusive && __wt_btree_exclusive(session, S2BT(session)))
+ if (!exclusive &&
+ __wt_oldest_split_gen(session) == S2C(session)->split_gen + 1)
exclusive = 1;
if (exclusive) {
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index a6c52678fbf..017658bafe4 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -28,6 +28,8 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
TAILQ_INIT(&conn->lsmqh); /* WT_LSM_TREE list */
+ conn->split_gen = 1;
+
/* Configuration. */
WT_RET(__wt_conn_config_init(session));
diff --git a/src/include/btree.i b/src/include/btree.i
index 7d71bfdb400..fb686b7bce7 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -937,27 +937,6 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
- * __wt_btree_exclusive --
- * Check if there is another thread active in this tree.
- */
-static inline int
-__wt_btree_exclusive(WT_SESSION_IMPL *session, WT_BTREE *btree)
-{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *s;
- uint32_t i, session_cnt;
-
- conn = S2C(session);
-
- /* No lock is required because the session array is fixed size. */
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i)
- if (s->active_btree == btree)
- return (0);
- return (1);
-}
-
-/*
* __wt_skip_choose_depth --
* Randomly choose a depth for a skiplist insert.
*/
diff --git a/src/include/connection.h b/src/include/connection.h
index 245308e4a6c..f8ae7930aca 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -109,6 +109,8 @@ struct __wt_connection_impl {
pthread_t cache_evict_tid; /* Eviction server thread ID */
int cache_evict_tid_set; /* Eviction server thread ID set */
+ uint64_t split_gen; /* Generation number for splits */
+
WT_SPINLOCK dhandle_lock; /* Locked: dhandle sweep */
/* Locked: data handle list */
SLIST_HEAD(__wt_dhandle_lh, __wt_data_handle) dhlh;
diff --git a/src/include/extern.h b/src/include/extern.h
index fd717e893ac..584b369abb3 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -1387,6 +1387,7 @@ extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session,
const char *checkpoint);
extern void __wt_session_discard_btree( WT_SESSION_IMPL *session,
WT_DATA_HANDLE_CACHE *dhandle_cache);
+extern uint64_t __wt_oldest_split_gen(WT_SESSION_IMPL *session);
extern int __wt_session_fotxn_add(WT_SESSION_IMPL *session,
void *p,
size_t len);
diff --git a/src/include/session.h b/src/include/session.h
index 619fc970d80..133f8ab107e 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -129,21 +129,21 @@ struct __wt_session_impl {
/*
* Sessions can "free" memory that may still be in use, and we use a
- * transactional generation to track it, that is, the session stores
- * a reference to the memory and a current transaction ID; when the
- * oldest transaction ID has moved beyond that point, the memory can
- * be discarded for real.
+ * split generation number to track it, that is, the session stores a
+ * reference to the memory and allocates a split generation; when no
+ * session is reading from that split generation, the memory can be
+ * discarded for real.
*/
- WT_BTREE *active_btree;
struct __wt_fotxn {
- WT_BTREE *btree; /* Tree that owned the memory */
- uint64_t txnid; /* Transaction ID */
+ uint64_t split_gen; /* Tree generation */
void *p; /* Memory, length */
size_t len;
} *fotxn; /* Free-on-transaction array */
size_t fotxn_cnt; /* Array entries */
size_t fotxn_size; /* Array size */
+ uint64_t split_gen; /* Reading split generation */
+
/*
* Hazard pointers.
* The number of hazard pointers that can be in use grows dynamically.
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index d8d529a9d7c..3b793113e28 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -3210,9 +3210,9 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_REC_PAGES_EVICTION 1081
/*! reconciliation failed because an update could not be included */
#define WT_STAT_CONN_REC_SKIPPED_UPDATE 1082
-/*! split bytes awaiting free */
+/*! split bytes stashed */
#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1083
-/*! split objects awaiting free */
+/*! split objects stashed */
#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1084
/*! pthread mutex shared lock read-lock calls */
#define WT_STAT_CONN_RWLOCK_READ 1085
diff --git a/src/session/session_misc.c b/src/session/session_misc.c
index 9aab0f8730e..c6f21fc9243 100644
--- a/src/session/session_misc.c
+++ b/src/session/session_misc.c
@@ -8,6 +8,29 @@
#include "wt_internal.h"
/*
+ * __wt_oldest_split_gen --
+ * Calculate the oldest active split generation.
+ */
+uint64_t
+__wt_oldest_split_gen(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *s;
+ uint64_t gen, oldest;
+ u_int i, session_cnt;
+
+ conn = S2C(session);
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (i = 0, s = conn->sessions, oldest = conn->split_gen + 1;
+ i < session_cnt;
+ i++, s++)
+ if (((gen = s->split_gen) != 0) && gen < oldest)
+ oldest = gen;
+
+ return (oldest);
+}
+
+/*
* __wt_session_fotxn_add --
* Add a new entry into the session's free-on-transaction generation list.
*/
@@ -16,22 +39,14 @@ __wt_session_fotxn_add(WT_SESSION_IMPL *session, void *p, size_t len)
{
WT_FOTXN *fotxn;
- /*
- * Make sure the current thread has a transaction pinned so that
- * we don't immediately free the memory we are stashing.
- */
- WT_ASSERT(session,
- WT_SESSION_TXN_STATE(session)->snap_min != WT_TXN_NONE);
+ WT_ASSERT(session, p != NULL);
/* Grow the list as necessary. */
WT_RET(__wt_realloc_def(session,
&session->fotxn_size, session->fotxn_cnt + 1, &session->fotxn));
fotxn = session->fotxn + session->fotxn_cnt++;
- fotxn->btree = S2BT(session);
- fotxn->txnid = __wt_txn_current_id(session) + 1;
- WT_ASSERT(session, !__wt_txn_visible_all(session, fotxn->txnid));
- WT_ASSERT(session, fotxn->p == NULL);
+ fotxn->split_gen = WT_ATOMIC_ADD(S2C(session)->split_gen, 1);
fotxn->p = p;
fotxn->len = len;
@@ -53,26 +68,19 @@ __wt_session_fotxn_add(WT_SESSION_IMPL *session, void *p, size_t len)
void
__wt_session_fotxn_discard(WT_SESSION_IMPL *session)
{
- WT_BTREE *prev_btree;
WT_FOTXN *fotxn;
+ uint64_t oldest;
size_t i;
- /* The last known tree that wasn't busy. */
- prev_btree = NULL;
-
- /* Bump the oldest transaction ID. */
- __wt_txn_update_oldest(session);
+ /* Get the oldest split generation. */
+ oldest = __wt_oldest_split_gen(session);
for (i = 0, fotxn = session->fotxn;
i < session->fotxn_cnt;
++i, ++fotxn) {
if (fotxn->p == NULL)
continue;
- else if (fotxn->btree == prev_btree)
- ;
- else if (__wt_btree_exclusive(session, fotxn->btree))
- prev_btree = fotxn->btree;
- else if (!__wt_txn_visible_all(session, fotxn->txnid))
+ else if (fotxn->split_gen >= oldest)
break;
/*
* It's a bad thing if another thread is in this memory
@@ -90,12 +98,12 @@ __wt_session_fotxn_discard(WT_SESSION_IMPL *session)
* If there are enough free slots at the beginning of the list, shuffle
* everything down.
*/
- if ((i > 100 || i == session->fotxn_cnt) &&
- (session->fotxn_cnt -= i) > 0) {
- memmove(session->fotxn, session->fotxn + i,
- session->fotxn_cnt * sizeof(session->fotxn[0]));
+ if (i > 100 || i == session->fotxn_cnt) {
+ if ((session->fotxn_cnt -= i) > 0)
+ memmove(session->fotxn, fotxn,
+ session->fotxn_cnt * sizeof(*fotxn));
memset(session->fotxn + session->fotxn_cnt, 0,
- i * sizeof(session->fotxn[0]));
+ i * sizeof(*fotxn));
}
}
diff --git a/src/support/stat.c b/src/support/stat.c
index ed22a214dd9..f1eb8236c0f 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -413,8 +413,8 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"page reconciliation calls for eviction";
stats->rec_skipped_update.desc =
"reconciliation failed because an update could not be included";
- stats->rec_split_stashed_bytes.desc = "split bytes awaiting free";
- stats->rec_split_stashed_objects.desc = "split objects awaiting free";
+ stats->rec_split_stashed_bytes.desc = "split bytes stashed";
+ stats->rec_split_stashed_objects.desc = "split objects stashed";
stats->rwlock_read.desc = "pthread mutex shared lock read-lock calls";
stats->rwlock_write.desc =
"pthread mutex shared lock write-lock calls";