diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-09-13 14:32:42 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-09-13 14:32:42 +1000 |
commit | 3bb9917005e9418a2ab0ad08702e3b82de431e38 (patch) | |
tree | 44422e365f802820a350ab33c76c35d32171e572 | |
parent | 25e5d7d4d2c8b4cb4dec77b0251a19150e859af3 (diff) | |
download | mongo-3bb9917005e9418a2ab0ad08702e3b82de431e38.tar.gz |
Track a pointer to the primary chunk in LSM cursors.
This avoids a messy loop to find the primary, and requires the arrays in
WT_LSM_TREE to switch to arrays of pointers, so that LSM chunks don't move
around underneath a cursor.
-rw-r--r-- | src/include/lsm.h | 6 | ||||
-rw-r--r-- | src/lsm/lsm_cursor.c | 52 | ||||
-rw-r--r-- | src/lsm/lsm_merge.c | 83 | ||||
-rw-r--r-- | src/lsm/lsm_meta.c | 20 | ||||
-rw-r--r-- | src/lsm/lsm_tree.c | 17 | ||||
-rw-r--r-- | src/lsm/lsm_worker.c | 19 |
6 files changed, 106 insertions, 91 deletions
diff --git a/src/include/lsm.h b/src/include/lsm.h index fce336f8806..6c4406540fd 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -16,6 +16,8 @@ struct __wt_cursor_lsm { WT_CURSOR **cursors; WT_CURSOR *current; /* The current cursor for iteration */ + WT_LSM_CHUNK *primary_chunk; /* The current primary chunk. */ + #define WT_CLSM_ITERATE_NEXT 0x01 /* Forward iteration */ #define WT_CLSM_ITERATE_PREV 0x02 /* Backward iteration */ #define WT_CLSM_MERGE 0x04 /* Merge cursor, don't update. */ @@ -58,9 +60,9 @@ struct __wt_lsm_tree { int nchunks; /* Number of active chunks */ int last; /* Last allocated ID. */ - WT_LSM_CHUNK *chunk; /* Array of active LSM chunks */ + WT_LSM_CHUNK **chunk; /* Array of active LSM chunks */ size_t chunk_alloc; /* Space allocated for chunks */ - WT_LSM_CHUNK *old_chunks; /* Array of old LSM chunks */ + WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */ size_t old_alloc; /* Space allocated for old chunks */ int nold_chunks; /* Number of old chunks */ int old_avail; /* Available old chunk slots */ diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index ce0a58c51e1..0ea323e5479 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -88,14 +88,21 @@ __clsm_close_cursors(WT_CURSOR_LSM *clsm) WT_CURSOR *c; int i; - if (clsm->cursors != NULL) { - FORALL_CURSORS(clsm, c, i) { - clsm->cursors[i] = NULL; - WT_RET(c->close(c)); - if ((bloom = clsm->blooms[i]) != NULL) { - clsm->blooms[i] = NULL; - WT_RET(__wt_bloom_close(bloom)); - } + if (clsm->cursors == NULL) + return (0); + + /* Detach from our old primary. */ + if (clsm->primary_chunk != NULL) { + WT_ATOMIC_SUB(clsm->primary_chunk->ncursor, 1); + clsm->primary_chunk = NULL; + } + + FORALL_CURSORS(clsm, c, i) { + clsm->cursors[i] = NULL; + WT_RET(c->close(c)); + if ((bloom = clsm->blooms[i]) != NULL) { + clsm->blooms[i] = NULL; + WT_RET(__wt_bloom_close(bloom)); } } @@ -116,7 +123,6 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm) WT_LSM_TREE *lsm_tree; WT_SESSION_IMPL *session; const char *ckpt_cfg[] = { "checkpoint=WiredTigerCheckpoint", NULL }; - const char *primary_uri; int i, nchunks; session = (WT_SESSION_IMPL *)clsm->iface.session; @@ -131,31 +137,9 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm) F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV); } - /* - * Take a copy of the primary cursor URI. We're about to close the - * cursor, but this pointer will stay valid because it is owned by the - * underlying btree handle. - */ - if (clsm->cursors != NULL && clsm->cursors[clsm->nchunks - 1] != NULL) - primary_uri = clsm->cursors[clsm->nchunks - 1]->uri; - else - primary_uri = NULL; WT_RET(__clsm_close_cursors(clsm)); __wt_spin_lock(session, &lsm_tree->lock); - /* Detach from our old primary. */ - if (primary_uri != NULL) { - for (i = lsm_tree->nchunks - 1; i >= 0; i--) { - chunk = &lsm_tree->chunk[i]; - if (strcmp(primary_uri, chunk->uri) == 0) { - --chunk->ncursor; - break; - } - } - /* We must find the primary: it can't have gone anywhere yet. */ - WT_ASSERT(session, i != -1); - } - /* Merge cursors have already figured out how many chunks they need. */ if (F_ISSET(clsm, WT_CLSM_MERGE)) nchunks = clsm->nchunks; @@ -175,7 +159,7 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm) * Read from the checkpoint if the file has been written. * Once all cursors switch, the in-memory tree can be evicted. */ - chunk = &lsm_tree->chunk[i]; + chunk = lsm_tree->chunk[i]; WT_ERR(__wt_curfile_open(session, chunk->uri, &clsm->iface, F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ? ckpt_cfg : NULL, cp)); @@ -192,7 +176,9 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm) WT_ASSERT(session, !F_ISSET(clsm, WT_CLSM_UPDATED) || !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)); - ++chunk->ncursor; + + clsm->primary_chunk = chunk; + WT_ATOMIC_ADD(clsm->primary_chunk->ncursor, 1); /* Peek into the btree layer to track the in-memory size. */ if (lsm_tree->memsizep == NULL) diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 6c35b92247d..c9973ca7805 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -8,6 +8,53 @@ #include "wt_internal.h" /* + * __wt_merge_update_tree -- + * Merge a set of chunks and create a new one. + * Must be called with the LSM lock held. + */ +static int +__lsm_merge_update_tree(WT_SESSION_IMPL *session, + WT_LSM_TREE *lsm_tree, int nchunks, WT_LSM_CHUNK **chunkp) +{ + WT_LSM_CHUNK *chunk; + size_t chunk_sz; + int i, j; + + if (nchunks > lsm_tree->old_avail) { + chunk_sz = sizeof(*lsm_tree->old_chunks); + WT_RET(__wt_realloc(session, + &lsm_tree->old_alloc, + chunk_sz * WT_MAX(10, lsm_tree->nold_chunks + 2 * nchunks), + &lsm_tree->old_chunks)); + lsm_tree->old_avail += (int)(lsm_tree->old_alloc / chunk_sz) - + lsm_tree->nold_chunks; + lsm_tree->nold_chunks = (int)(lsm_tree->old_alloc / chunk_sz); + } + /* Copy entries one at a time, so we can reuse gaps in the list. */ + for (i = j = 0; j < nchunks && i < lsm_tree->nold_chunks; i++) { + if (lsm_tree->old_chunks[i] == NULL) { + lsm_tree->old_chunks[i] = lsm_tree->chunk[j++]; + --lsm_tree->old_avail; + } + } + + WT_ASSERT(session, j == nchunks); + + /* Update the current chunk list. */ + memmove(lsm_tree->chunk + 1, lsm_tree->chunk + nchunks, + (lsm_tree->nchunks - nchunks) * sizeof(*lsm_tree->chunk)); + lsm_tree->nchunks -= nchunks - 1; + memset(lsm_tree->chunk + lsm_tree->nchunks, 0, + (nchunks - 1) * sizeof(*lsm_tree->chunk)); + WT_RET(__wt_calloc_def(session, 1, &chunk)); + lsm_tree->chunk[0] = chunk; + lsm_tree->dsk_gen++; + + *chunkp = chunk; + return (0); +} + +/* * __wt_lsm_major_merge -- * Merge a set of chunks of an LSM tree including the oldest. */ @@ -23,8 +70,7 @@ __wt_lsm_major_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_SESSION *wt_session; const char *dest_uri; uint64_t record_count; - size_t chunk_sz; - int dest_id, i, j, nchunks; + int dest_id, nchunks; src = dest = NULL; @@ -55,8 +101,8 @@ __wt_lsm_major_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) */ nchunks = WT_MIN((int)S2C(session)->hazard_size / 2, nchunks); while (nchunks > 1 && - (!F_ISSET(&lsm_tree->chunk[nchunks - 1], WT_LSM_CHUNK_ONDISK) || - lsm_tree->chunk[nchunks - 1].ncursor > 0)) + (!F_ISSET(lsm_tree->chunk[nchunks - 1], WT_LSM_CHUNK_ONDISK) || + lsm_tree->chunk[nchunks - 1]->ncursor > 0)) --nchunks; if (nchunks <= 1) @@ -112,40 +158,15 @@ __wt_lsm_major_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ERR(ret); __wt_spin_lock(session, &lsm_tree->lock); - if (nchunks > lsm_tree->old_avail) { - chunk_sz = sizeof(*lsm_tree->old_chunks); - WT_ERR(__wt_realloc(session, - &lsm_tree->old_alloc, - chunk_sz * WT_MAX(10, lsm_tree->nold_chunks + 2 * nchunks), - &lsm_tree->old_chunks)); - lsm_tree->old_avail += (int)(lsm_tree->old_alloc / chunk_sz) - - lsm_tree->nold_chunks; - lsm_tree->nold_chunks = (int)(lsm_tree->old_alloc / chunk_sz); - } - /* Copy entries one at a time, so we can reuse gaps in the list. */ - for (i = j = 0; j < nchunks && i < lsm_tree->nold_chunks; i++) { - if (lsm_tree->old_chunks[i].uri == NULL) { - lsm_tree->old_chunks[i] = lsm_tree->chunk[j++]; - --lsm_tree->old_avail; - } - } + ret = __lsm_merge_update_tree(session, lsm_tree, nchunks, &chunk); - WT_ASSERT(session, j == nchunks); - - /* Update the current chunk list. */ - memmove(lsm_tree->chunk + 1, lsm_tree->chunk + nchunks, - (lsm_tree->nchunks - nchunks) * sizeof(*lsm_tree->chunk)); - lsm_tree->nchunks -= nchunks - 1; - chunk = &lsm_tree->chunk[0]; - WT_CLEAR(*chunk); chunk->uri = dest_uri; dest_uri = NULL; - chunk->bloom_uri = __wt_buf_steal(session, bbuf, NULL); + chunk->bloom_uri = __wt_buf_steal(session, bbuf, 0); chunk->count = record_count; F_SET(chunk, WT_LSM_CHUNK_ONDISK); ret = __wt_lsm_meta_write(session, lsm_tree); - lsm_tree->dsk_gen++; __wt_spin_unlock(session, &lsm_tree->lock); printf("Merge done\n"); diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c index 2d4858046da..4259b6644f3 100644 --- a/src/lsm/lsm_meta.c +++ b/src/lsm/lsm_meta.c @@ -50,7 +50,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ERR(__wt_config_subinit(session, &lparser, &cv)); chunk_sz = sizeof(*lsm_tree->chunk); for (nchunks = 0; (ret = - __wt_config_next( &lparser, &lk, &lv)) == 0; ) { + __wt_config_next(&lparser, &lk, &lv)) == 0; ) { if (WT_STRING_MATCH("bloom", lk.str, lk.len)) { WT_ERR(__wt_strndup(session, lv.str, lv.len, &chunk->bloom_uri)); @@ -67,20 +67,19 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_MAX(10 * chunk_sz, 2 * lsm_tree->chunk_alloc), &lsm_tree->chunk)); - chunk = &lsm_tree->chunk[nchunks]; + WT_ERR(__wt_calloc_def(session, 1, &chunk)); + lsm_tree->chunk[nchunks++] = chunk; WT_ERR(__wt_strndup(session, lk.str, lk.len, &chunk->uri)); chunk->flags = WT_LSM_CHUNK_ONDISK; - nchunks++; } WT_ERR_NOTFOUND_OK(ret); lsm_tree->nchunks = nchunks; } else if (WT_STRING_MATCH("old_chunks", ck.str, ck.len)) { WT_ERR(__wt_config_subinit(session, &lparser, &cv)); chunk_sz = sizeof(*lsm_tree->old_chunks); - for (nchunks = 0; - (ret = __wt_config_next( &lparser, &lk, &lv)) == 0; - nchunks++) { + for (nchunks = 0; (ret = + __wt_config_next(&lparser, &lk, &lv)) == 0; ) { if ((nchunks + 1) * chunk_sz > lsm_tree->old_avail * chunk_sz) { alloc = lsm_tree->old_alloc; @@ -96,7 +95,8 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) ((lsm_tree->old_alloc - alloc) / chunk_sz); } - chunk = &lsm_tree->old_chunks[nchunks]; + WT_ERR(__wt_calloc_def(session, 1, &chunk)); + lsm_tree->old_chunks[nchunks++] = chunk; WT_ERR(__wt_strndup(session, lk.str, lk.len, &chunk->uri)); chunk->flags = WT_LSM_CHUNK_ONDISK; @@ -139,7 +139,7 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) lsm_tree->bloom_factor, lsm_tree->bloom_k)); WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=[")); for (i = 0; i < lsm_tree->nchunks; i++) { - chunk = &lsm_tree->chunk[i]; + chunk = lsm_tree->chunk[i]; if (i > 0) WT_ERR(__wt_buf_catfmt(session, buf, ",")); WT_ERR(__wt_buf_catfmt(session, buf, "\"%s\"", chunk->uri)); @@ -154,8 +154,8 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ERR(__wt_buf_catfmt(session, buf, ",old_chunks=[")); first = 1; for (i = 0; i < (int)lsm_tree->nold_chunks; i++) { - chunk = &lsm_tree->old_chunks[i]; - if (chunk->uri == NULL) + chunk = lsm_tree->old_chunks[i]; + if (chunk == NULL) continue; if (first) first = 0; diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index cf9f4d81421..19a95656a9d 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -14,6 +14,7 @@ static void __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { + WT_LSM_CHUNK *chunk; int i; TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q); @@ -21,8 +22,12 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) __wt_free(session, lsm_tree->name); for (i = 0; i < lsm_tree->nchunks; i++) { - __wt_free(session, lsm_tree->chunk[i].bloom_uri); - __wt_free(session, lsm_tree->chunk[i].uri); + if ((chunk = lsm_tree->chunk[i]) == NULL) + continue; + + __wt_free(session, chunk->bloom_uri); + __wt_free(session, chunk->uri); + __wt_free(session, chunk); } __wt_free(session, lsm_tree->chunk); @@ -288,8 +293,8 @@ __wt_lsm_tree_switch( 2 * lsm_tree->chunk_alloc), &lsm_tree->chunk)); - chunk = &lsm_tree->chunk[lsm_tree->nchunks++]; - WT_CLEAR(*chunk); + WT_ERR(__wt_calloc_def(session, 1, &chunk)); + lsm_tree->chunk[lsm_tree->nchunks++] = chunk; WT_ERR(__wt_lsm_tree_create_chunk(session, lsm_tree, WT_ATOMIC_ADD(lsm_tree->last, 1), &chunk->uri)); @@ -320,7 +325,7 @@ __wt_lsm_tree_drop( /* Drop the chunks. */ for (i = 0; i < lsm_tree->nchunks; i++) { - chunk = &lsm_tree->chunk[i]; + chunk = lsm_tree->chunk[i]; WT_ERR(__wt_schema_drop(session, chunk->uri, cfg)); if (chunk->bloom_uri != NULL) WT_ERR( @@ -353,7 +358,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session, WT_RET(__wt_lsm_tree_get(session, uri, &lsm_tree)); for (i = 0; i < lsm_tree->nchunks; i++) { - chunk = &lsm_tree->chunk[i]; + chunk = lsm_tree->chunk[i]; if (func == __wt_checkpoint && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) continue; diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 78a2dcf9041..b73b96ff2fc 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -19,7 +19,7 @@ void * __wt_lsm_worker(void *arg) { WT_DECL_RET; - WT_LSM_CHUNK *chunk, *chunk_array; + WT_LSM_CHUNK *chunk, **chunk_array; WT_LSM_TREE *lsm_tree; WT_SESSION_IMPL *session; const char *cfg[] = { "name=,drop=", NULL }; @@ -46,7 +46,7 @@ __wt_lsm_worker(void *arg) * to merge operations. */ for (nchunks = lsm_tree->nchunks - 1; - nchunks > 0 && lsm_tree->chunk[nchunks - 1].ncursor > 0; + nchunks > 0 && lsm_tree->chunk[nchunks - 1]->ncursor > 0; --nchunks) ; if (chunk_alloc < lsm_tree->chunk_alloc) @@ -63,7 +63,8 @@ __wt_lsm_worker(void *arg) * Write checkpoints in all completed files, then find * something to merge. */ - for (i = 0, chunk = chunk_array; i < nchunks; i++, chunk++) { + for (i = 0; i < nchunks; i++) { + chunk = chunk_array[i]; if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) || chunk->ncursor > 0) continue; @@ -78,7 +79,7 @@ __wt_lsm_worker(void *arg) __wt_checkpoint, cfg, 0)); if (ret == 0) { __wt_spin_lock(session, &lsm_tree->lock); - F_SET(&lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK); + F_SET(lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK); lsm_tree->dsk_gen++; __wt_spin_unlock(session, &lsm_tree->lock); progress = 1; @@ -111,15 +112,16 @@ static int __lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_DECL_RET; - char *uri; + WT_LSM_CHUNK *chunk; + const char *uri; const char *drop_cfg[] = { NULL }; int found, i; found = 0; for (i = 0; i < lsm_tree->nold_chunks; i++) { - uri = (char *)lsm_tree->old_chunks[i].uri; - if (uri == NULL) + if ((chunk = lsm_tree->old_chunks[i]) == NULL) continue; + uri = chunk->uri; if (!found) { found = 1; /* TODO: Do we need the lsm_tree lock for all drops? */ @@ -137,8 +139,7 @@ __lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (ret != 0) goto err; __wt_free(session, uri); - memset( - &lsm_tree->old_chunks[i], 0, sizeof(*lsm_tree->old_chunks)); + __wt_free(session, lsm_tree->old_chunks[i]); ++lsm_tree->old_avail; } if (found) { |