summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2012-09-13 14:32:42 +1000
committerMichael Cahill <michael.cahill@wiredtiger.com>2012-09-13 14:32:42 +1000
commit3bb9917005e9418a2ab0ad08702e3b82de431e38 (patch)
tree44422e365f802820a350ab33c76c35d32171e572
parent25e5d7d4d2c8b4cb4dec77b0251a19150e859af3 (diff)
downloadmongo-3bb9917005e9418a2ab0ad08702e3b82de431e38.tar.gz
Track a pointer to the primary chunk in LSM cursors.
This avoids a messy loop to find the primary, and requires the arrays in WT_LSM_TREE to switch to arrays of pointers, so that LSM chunks don't move around underneath a cursor.
-rw-r--r--src/include/lsm.h6
-rw-r--r--src/lsm/lsm_cursor.c52
-rw-r--r--src/lsm/lsm_merge.c83
-rw-r--r--src/lsm/lsm_meta.c20
-rw-r--r--src/lsm/lsm_tree.c17
-rw-r--r--src/lsm/lsm_worker.c19
6 files changed, 106 insertions, 91 deletions
diff --git a/src/include/lsm.h b/src/include/lsm.h
index fce336f8806..6c4406540fd 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -16,6 +16,8 @@ struct __wt_cursor_lsm {
WT_CURSOR **cursors;
WT_CURSOR *current; /* The current cursor for iteration */
+ WT_LSM_CHUNK *primary_chunk; /* The current primary chunk. */
+
#define WT_CLSM_ITERATE_NEXT 0x01 /* Forward iteration */
#define WT_CLSM_ITERATE_PREV 0x02 /* Backward iteration */
#define WT_CLSM_MERGE 0x04 /* Merge cursor, don't update. */
@@ -58,9 +60,9 @@ struct __wt_lsm_tree {
int nchunks; /* Number of active chunks */
int last; /* Last allocated ID. */
- WT_LSM_CHUNK *chunk; /* Array of active LSM chunks */
+ WT_LSM_CHUNK **chunk; /* Array of active LSM chunks */
size_t chunk_alloc; /* Space allocated for chunks */
- WT_LSM_CHUNK *old_chunks; /* Array of old LSM chunks */
+ WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */
size_t old_alloc; /* Space allocated for old chunks */
int nold_chunks; /* Number of old chunks */
int old_avail; /* Available old chunk slots */
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index ce0a58c51e1..0ea323e5479 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -88,14 +88,21 @@ __clsm_close_cursors(WT_CURSOR_LSM *clsm)
WT_CURSOR *c;
int i;
- if (clsm->cursors != NULL) {
- FORALL_CURSORS(clsm, c, i) {
- clsm->cursors[i] = NULL;
- WT_RET(c->close(c));
- if ((bloom = clsm->blooms[i]) != NULL) {
- clsm->blooms[i] = NULL;
- WT_RET(__wt_bloom_close(bloom));
- }
+ if (clsm->cursors == NULL)
+ return (0);
+
+ /* Detach from our old primary. */
+ if (clsm->primary_chunk != NULL) {
+ WT_ATOMIC_SUB(clsm->primary_chunk->ncursor, 1);
+ clsm->primary_chunk = NULL;
+ }
+
+ FORALL_CURSORS(clsm, c, i) {
+ clsm->cursors[i] = NULL;
+ WT_RET(c->close(c));
+ if ((bloom = clsm->blooms[i]) != NULL) {
+ clsm->blooms[i] = NULL;
+ WT_RET(__wt_bloom_close(bloom));
}
}
@@ -116,7 +123,6 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm)
WT_LSM_TREE *lsm_tree;
WT_SESSION_IMPL *session;
const char *ckpt_cfg[] = { "checkpoint=WiredTigerCheckpoint", NULL };
- const char *primary_uri;
int i, nchunks;
session = (WT_SESSION_IMPL *)clsm->iface.session;
@@ -131,31 +137,9 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm)
F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
}
- /*
- * Take a copy of the primary cursor URI. We're about to close the
- * cursor, but this pointer will stay valid because it is owned by the
- * underlying btree handle.
- */
- if (clsm->cursors != NULL && clsm->cursors[clsm->nchunks - 1] != NULL)
- primary_uri = clsm->cursors[clsm->nchunks - 1]->uri;
- else
- primary_uri = NULL;
WT_RET(__clsm_close_cursors(clsm));
__wt_spin_lock(session, &lsm_tree->lock);
- /* Detach from our old primary. */
- if (primary_uri != NULL) {
- for (i = lsm_tree->nchunks - 1; i >= 0; i--) {
- chunk = &lsm_tree->chunk[i];
- if (strcmp(primary_uri, chunk->uri) == 0) {
- --chunk->ncursor;
- break;
- }
- }
- /* We must find the primary: it can't have gone anywhere yet. */
- WT_ASSERT(session, i != -1);
- }
-
/* Merge cursors have already figured out how many chunks they need. */
if (F_ISSET(clsm, WT_CLSM_MERGE))
nchunks = clsm->nchunks;
@@ -175,7 +159,7 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm)
* Read from the checkpoint if the file has been written.
* Once all cursors switch, the in-memory tree can be evicted.
*/
- chunk = &lsm_tree->chunk[i];
+ chunk = lsm_tree->chunk[i];
WT_ERR(__wt_curfile_open(session,
chunk->uri, &clsm->iface,
F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ? ckpt_cfg : NULL, cp));
@@ -192,7 +176,9 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm)
WT_ASSERT(session,
!F_ISSET(clsm, WT_CLSM_UPDATED) ||
!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK));
- ++chunk->ncursor;
+
+ clsm->primary_chunk = chunk;
+ WT_ATOMIC_ADD(clsm->primary_chunk->ncursor, 1);
/* Peek into the btree layer to track the in-memory size. */
if (lsm_tree->memsizep == NULL)
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 6c35b92247d..c9973ca7805 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -8,6 +8,53 @@
#include "wt_internal.h"
/*
+ * __wt_merge_update_tree --
+ * Merge a set of chunks and create a new one.
+ * Must be called with the LSM lock held.
+ */
+static int
+__lsm_merge_update_tree(WT_SESSION_IMPL *session,
+ WT_LSM_TREE *lsm_tree, int nchunks, WT_LSM_CHUNK **chunkp)
+{
+ WT_LSM_CHUNK *chunk;
+ size_t chunk_sz;
+ int i, j;
+
+ if (nchunks > lsm_tree->old_avail) {
+ chunk_sz = sizeof(*lsm_tree->old_chunks);
+ WT_RET(__wt_realloc(session,
+ &lsm_tree->old_alloc,
+ chunk_sz * WT_MAX(10, lsm_tree->nold_chunks + 2 * nchunks),
+ &lsm_tree->old_chunks));
+ lsm_tree->old_avail += (int)(lsm_tree->old_alloc / chunk_sz) -
+ lsm_tree->nold_chunks;
+ lsm_tree->nold_chunks = (int)(lsm_tree->old_alloc / chunk_sz);
+ }
+ /* Copy entries one at a time, so we can reuse gaps in the list. */
+ for (i = j = 0; j < nchunks && i < lsm_tree->nold_chunks; i++) {
+ if (lsm_tree->old_chunks[i] == NULL) {
+ lsm_tree->old_chunks[i] = lsm_tree->chunk[j++];
+ --lsm_tree->old_avail;
+ }
+ }
+
+ WT_ASSERT(session, j == nchunks);
+
+ /* Update the current chunk list. */
+ memmove(lsm_tree->chunk + 1, lsm_tree->chunk + nchunks,
+ (lsm_tree->nchunks - nchunks) * sizeof(*lsm_tree->chunk));
+ lsm_tree->nchunks -= nchunks - 1;
+ memset(lsm_tree->chunk + lsm_tree->nchunks, 0,
+ (nchunks - 1) * sizeof(*lsm_tree->chunk));
+ WT_RET(__wt_calloc_def(session, 1, &chunk));
+ lsm_tree->chunk[0] = chunk;
+ lsm_tree->dsk_gen++;
+
+ *chunkp = chunk;
+ return (0);
+}
+
+/*
* __wt_lsm_major_merge --
* Merge a set of chunks of an LSM tree including the oldest.
*/
@@ -23,8 +70,7 @@ __wt_lsm_major_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_SESSION *wt_session;
const char *dest_uri;
uint64_t record_count;
- size_t chunk_sz;
- int dest_id, i, j, nchunks;
+ int dest_id, nchunks;
src = dest = NULL;
@@ -55,8 +101,8 @@ __wt_lsm_major_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
*/
nchunks = WT_MIN((int)S2C(session)->hazard_size / 2, nchunks);
while (nchunks > 1 &&
- (!F_ISSET(&lsm_tree->chunk[nchunks - 1], WT_LSM_CHUNK_ONDISK) ||
- lsm_tree->chunk[nchunks - 1].ncursor > 0))
+ (!F_ISSET(lsm_tree->chunk[nchunks - 1], WT_LSM_CHUNK_ONDISK) ||
+ lsm_tree->chunk[nchunks - 1]->ncursor > 0))
--nchunks;
if (nchunks <= 1)
@@ -112,40 +158,15 @@ __wt_lsm_major_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_ERR(ret);
__wt_spin_lock(session, &lsm_tree->lock);
- if (nchunks > lsm_tree->old_avail) {
- chunk_sz = sizeof(*lsm_tree->old_chunks);
- WT_ERR(__wt_realloc(session,
- &lsm_tree->old_alloc,
- chunk_sz * WT_MAX(10, lsm_tree->nold_chunks + 2 * nchunks),
- &lsm_tree->old_chunks));
- lsm_tree->old_avail += (int)(lsm_tree->old_alloc / chunk_sz) -
- lsm_tree->nold_chunks;
- lsm_tree->nold_chunks = (int)(lsm_tree->old_alloc / chunk_sz);
- }
- /* Copy entries one at a time, so we can reuse gaps in the list. */
- for (i = j = 0; j < nchunks && i < lsm_tree->nold_chunks; i++) {
- if (lsm_tree->old_chunks[i].uri == NULL) {
- lsm_tree->old_chunks[i] = lsm_tree->chunk[j++];
- --lsm_tree->old_avail;
- }
- }
+ ret = __lsm_merge_update_tree(session, lsm_tree, nchunks, &chunk);
- WT_ASSERT(session, j == nchunks);
-
- /* Update the current chunk list. */
- memmove(lsm_tree->chunk + 1, lsm_tree->chunk + nchunks,
- (lsm_tree->nchunks - nchunks) * sizeof(*lsm_tree->chunk));
- lsm_tree->nchunks -= nchunks - 1;
- chunk = &lsm_tree->chunk[0];
- WT_CLEAR(*chunk);
chunk->uri = dest_uri;
dest_uri = NULL;
- chunk->bloom_uri = __wt_buf_steal(session, bbuf, NULL);
+ chunk->bloom_uri = __wt_buf_steal(session, bbuf, 0);
chunk->count = record_count;
F_SET(chunk, WT_LSM_CHUNK_ONDISK);
ret = __wt_lsm_meta_write(session, lsm_tree);
- lsm_tree->dsk_gen++;
__wt_spin_unlock(session, &lsm_tree->lock);
printf("Merge done\n");
diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c
index 2d4858046da..4259b6644f3 100644
--- a/src/lsm/lsm_meta.c
+++ b/src/lsm/lsm_meta.c
@@ -50,7 +50,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_ERR(__wt_config_subinit(session, &lparser, &cv));
chunk_sz = sizeof(*lsm_tree->chunk);
for (nchunks = 0; (ret =
- __wt_config_next( &lparser, &lk, &lv)) == 0; ) {
+ __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
WT_ERR(__wt_strndup(session,
lv.str, lv.len, &chunk->bloom_uri));
@@ -67,20 +67,19 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_MAX(10 * chunk_sz,
2 * lsm_tree->chunk_alloc),
&lsm_tree->chunk));
- chunk = &lsm_tree->chunk[nchunks];
+ WT_ERR(__wt_calloc_def(session, 1, &chunk));
+ lsm_tree->chunk[nchunks++] = chunk;
WT_ERR(__wt_strndup(session,
lk.str, lk.len, &chunk->uri));
chunk->flags = WT_LSM_CHUNK_ONDISK;
- nchunks++;
}
WT_ERR_NOTFOUND_OK(ret);
lsm_tree->nchunks = nchunks;
} else if (WT_STRING_MATCH("old_chunks", ck.str, ck.len)) {
WT_ERR(__wt_config_subinit(session, &lparser, &cv));
chunk_sz = sizeof(*lsm_tree->old_chunks);
- for (nchunks = 0;
- (ret = __wt_config_next( &lparser, &lk, &lv)) == 0;
- nchunks++) {
+ for (nchunks = 0; (ret =
+ __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
if ((nchunks + 1) * chunk_sz >
lsm_tree->old_avail * chunk_sz) {
alloc = lsm_tree->old_alloc;
@@ -96,7 +95,8 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
((lsm_tree->old_alloc - alloc) /
chunk_sz);
}
- chunk = &lsm_tree->old_chunks[nchunks];
+ WT_ERR(__wt_calloc_def(session, 1, &chunk));
+ lsm_tree->old_chunks[nchunks++] = chunk;
WT_ERR(__wt_strndup(session,
lk.str, lk.len, &chunk->uri));
chunk->flags = WT_LSM_CHUNK_ONDISK;
@@ -139,7 +139,7 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
lsm_tree->bloom_factor, lsm_tree->bloom_k));
WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=["));
for (i = 0; i < lsm_tree->nchunks; i++) {
- chunk = &lsm_tree->chunk[i];
+ chunk = lsm_tree->chunk[i];
if (i > 0)
WT_ERR(__wt_buf_catfmt(session, buf, ","));
WT_ERR(__wt_buf_catfmt(session, buf, "\"%s\"", chunk->uri));
@@ -154,8 +154,8 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_ERR(__wt_buf_catfmt(session, buf, ",old_chunks=["));
first = 1;
for (i = 0; i < (int)lsm_tree->nold_chunks; i++) {
- chunk = &lsm_tree->old_chunks[i];
- if (chunk->uri == NULL)
+ chunk = lsm_tree->old_chunks[i];
+ if (chunk == NULL)
continue;
if (first)
first = 0;
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index cf9f4d81421..19a95656a9d 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -14,6 +14,7 @@
static void
__lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
+ WT_LSM_CHUNK *chunk;
int i;
TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q);
@@ -21,8 +22,12 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
__wt_free(session, lsm_tree->name);
for (i = 0; i < lsm_tree->nchunks; i++) {
- __wt_free(session, lsm_tree->chunk[i].bloom_uri);
- __wt_free(session, lsm_tree->chunk[i].uri);
+ if ((chunk = lsm_tree->chunk[i]) == NULL)
+ continue;
+
+ __wt_free(session, chunk->bloom_uri);
+ __wt_free(session, chunk->uri);
+ __wt_free(session, chunk);
}
__wt_free(session, lsm_tree->chunk);
@@ -288,8 +293,8 @@ __wt_lsm_tree_switch(
2 * lsm_tree->chunk_alloc),
&lsm_tree->chunk));
- chunk = &lsm_tree->chunk[lsm_tree->nchunks++];
- WT_CLEAR(*chunk);
+ WT_ERR(__wt_calloc_def(session, 1, &chunk));
+ lsm_tree->chunk[lsm_tree->nchunks++] = chunk;
WT_ERR(__wt_lsm_tree_create_chunk(session,
lsm_tree, WT_ATOMIC_ADD(lsm_tree->last, 1),
&chunk->uri));
@@ -320,7 +325,7 @@ __wt_lsm_tree_drop(
/* Drop the chunks. */
for (i = 0; i < lsm_tree->nchunks; i++) {
- chunk = &lsm_tree->chunk[i];
+ chunk = lsm_tree->chunk[i];
WT_ERR(__wt_schema_drop(session, chunk->uri, cfg));
if (chunk->bloom_uri != NULL)
WT_ERR(
@@ -353,7 +358,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
WT_RET(__wt_lsm_tree_get(session, uri, &lsm_tree));
for (i = 0; i < lsm_tree->nchunks; i++) {
- chunk = &lsm_tree->chunk[i];
+ chunk = lsm_tree->chunk[i];
if (func == __wt_checkpoint &&
F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
continue;
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index 78a2dcf9041..b73b96ff2fc 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -19,7 +19,7 @@ void *
__wt_lsm_worker(void *arg)
{
WT_DECL_RET;
- WT_LSM_CHUNK *chunk, *chunk_array;
+ WT_LSM_CHUNK *chunk, **chunk_array;
WT_LSM_TREE *lsm_tree;
WT_SESSION_IMPL *session;
const char *cfg[] = { "name=,drop=", NULL };
@@ -46,7 +46,7 @@ __wt_lsm_worker(void *arg)
* to merge operations.
*/
for (nchunks = lsm_tree->nchunks - 1;
- nchunks > 0 && lsm_tree->chunk[nchunks - 1].ncursor > 0;
+ nchunks > 0 && lsm_tree->chunk[nchunks - 1]->ncursor > 0;
--nchunks)
;
if (chunk_alloc < lsm_tree->chunk_alloc)
@@ -63,7 +63,8 @@ __wt_lsm_worker(void *arg)
* Write checkpoints in all completed files, then find
* something to merge.
*/
- for (i = 0, chunk = chunk_array; i < nchunks; i++, chunk++) {
+ for (i = 0; i < nchunks; i++) {
+ chunk = chunk_array[i];
if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ||
chunk->ncursor > 0)
continue;
@@ -78,7 +79,7 @@ __wt_lsm_worker(void *arg)
__wt_checkpoint, cfg, 0));
if (ret == 0) {
__wt_spin_lock(session, &lsm_tree->lock);
- F_SET(&lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK);
+ F_SET(lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK);
lsm_tree->dsk_gen++;
__wt_spin_unlock(session, &lsm_tree->lock);
progress = 1;
@@ -111,15 +112,16 @@ static int
__lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
WT_DECL_RET;
- char *uri;
+ WT_LSM_CHUNK *chunk;
+ const char *uri;
const char *drop_cfg[] = { NULL };
int found, i;
found = 0;
for (i = 0; i < lsm_tree->nold_chunks; i++) {
- uri = (char *)lsm_tree->old_chunks[i].uri;
- if (uri == NULL)
+ if ((chunk = lsm_tree->old_chunks[i]) == NULL)
continue;
+ uri = chunk->uri;
if (!found) {
found = 1;
/* TODO: Do we need the lsm_tree lock for all drops? */
@@ -137,8 +139,7 @@ __lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (ret != 0)
goto err;
__wt_free(session, uri);
- memset(
- &lsm_tree->old_chunks[i], 0, sizeof(*lsm_tree->old_chunks));
+ __wt_free(session, lsm_tree->old_chunks[i]);
++lsm_tree->old_avail;
}
if (found) {