summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dist/filelist4
-rw-r--r--dist/s_string.ok5
-rw-r--r--examples/c/ex_all.c22
-rw-r--r--src/btree/bt_handle.c21
-rw-r--r--src/conn/conn_api.c12
-rw-r--r--src/conn/conn_handle.c2
-rw-r--r--src/include/api.h6
-rw-r--r--src/include/extern.h21
-rw-r--r--src/include/lsm.h57
-rw-r--r--src/include/packing.i10
-rw-r--r--src/include/wiredtiger.in11
-rw-r--r--src/include/wt_internal.h7
-rw-r--r--src/lsm/lsm_cursor.c576
-rw-r--r--src/lsm/lsm_dsrc.c154
-rw-r--r--src/lsm/lsm_tree.c189
-rw-r--r--src/lsm/lsm_worker.c34
-rw-r--r--src/schema/schema_drop.c2
-rw-r--r--src/schema/schema_rename.c2
-rw-r--r--src/schema/schema_truncate.c2
-rw-r--r--src/schema/schema_util.c28
-rw-r--r--src/session/session_api.c7
-rw-r--r--src/support/scratch.c2
-rw-r--r--test/suite/test_cursor01.py7
23 files changed, 1134 insertions, 47 deletions
diff --git a/dist/filelist b/dist/filelist
index d732f3c2ec6..8b5a754aa60 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -65,6 +65,10 @@ src/cursor/cur_std.c
src/cursor/cur_table.c
src/log/log.c
src/log/log_desc.c
+src/lsm/lsm_cursor.c
+src/lsm/lsm_dsrc.c
+src/lsm/lsm_tree.c
+src/lsm/lsm_worker.c
src/meta/meta_api.c
src/meta/meta_apply.c
src/meta/meta_ckpt.c
diff --git a/dist/s_string.ok b/dist/s_string.ok
index c037ba51ec7..ecd7e8362dc 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -92,8 +92,8 @@ Kounavis
LF
LRU
LSB
+LSM
LSN
-LSN's
LSNs
LeafGreen
Llqr
@@ -247,6 +247,7 @@ ckptfrag
ckptlist
cksum
clr
+clsm
cmp
cnt
colcheck
@@ -426,7 +427,7 @@ logput
lookup
lookups
lru
-lsn
+lsm
lu
majorp
malloc
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index 9b883a66879..ce19525775e 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -552,13 +552,13 @@ my_create(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
/*! [WT_DATA_SOURCE drop] */
static int
my_drop(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *name, const char *config)
+ const char *name, const char *cfg[])
{
/* Unused parameters */
(void)dsrc;
(void)session;
(void)name;
- (void)config;
+ (void)cfg;
return (0);
}
@@ -567,16 +567,14 @@ my_drop(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
/*! [WT_DATA_SOURCE open_cursor] */
static int
my_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *obj, WT_CURSOR *old_cursor, const char *config,
- WT_CURSOR **new_cursor)
+ const char *obj, const char *cfg[], WT_CURSOR **new_cursor)
{
/* Unused parameters */
(void)dsrc;
(void)session;
(void)obj;
- (void)old_cursor;
- (void)config;
+ (void)cfg;
(void)new_cursor;
return (0);
@@ -586,14 +584,14 @@ my_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
/*! [WT_DATA_SOURCE rename] */
static int
my_rename(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *oldname, const char *newname, const char *config)
+ const char *oldname, const char *newname, const char *cfg[])
{
/* Unused parameters */
(void)dsrc;
(void)session;
(void)oldname;
(void)newname;
- (void)config;
+ (void)cfg;
return (0);
}
@@ -602,13 +600,13 @@ my_rename(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
/*! [WT_DATA_SOURCE sync] */
static int
my_sync(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *name, const char *config)
+ const char *name, const char *cfg[])
{
/* Unused parameters */
(void)dsrc;
(void)session;
(void)name;
- (void)config;
+ (void)cfg;
return (0);
}
@@ -617,13 +615,13 @@ my_sync(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
/*! [WT_DATA_SOURCE truncate] */
static int
my_truncate(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *name, const char *config)
+ const char *name, const char *cfg[])
{
/* Unused parameters */
(void)dsrc;
(void)session;
(void)name;
- (void)config;
+ (void)cfg;
return (0);
}
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 5459a7f005c..88470b76e30 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -383,6 +383,27 @@ __wt_btree_root_empty(WT_SESSION_IMPL *session, WT_PAGE **leafp)
}
/*
+ * __wt_btree_get_memsize --
+ * Access the size of an in-memory tree with a single leaf page.
+ */
+int
+__wt_btree_get_memsize(WT_SESSION_IMPL *session, uint32_t **memsizep)
+{
+ WT_BTREE *btree;
+ WT_PAGE *root, *child;
+
+ btree = session->btree;
+ root = btree->root_page;
+
+ if (root->entries != 1)
+ return (WT_ERROR);
+
+ child = root->u.intl.t->page;
+ *memsizep = &child->memory_footprint;
+ return (0);
+}
+
+/*
* __btree_get_last_recno --
* Set the last record number for a column-store.
*/
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index f69f0771b81..ada076fc19c 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -319,6 +319,12 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config)
if (!F_ISSET(s, WT_SESSION_INTERNAL))
__wt_free(session, s->hazard);
+ /*
+ * XXX LSM cleanup.
+ * This is structured so that it could be moved to an extension.
+ */
+ WT_ERR(__wt_lsm_cleanup(&conn->iface));
+
/* Close open btree handles. */
WT_TRET(__wt_conn_btree_discard(conn));
@@ -929,6 +935,12 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
/* If there's a hot-backup file, load it. */
WT_ERR(__wt_metadata_load_backup(session));
+ /*
+ * XXX LSM initialization.
+ * This is structured so that it could be moved to an extension.
+ */
+ WT_ERR(__wt_lsm_init(&conn->iface, NULL));
+
STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0);
*wt_connp = &conn->iface;
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index 0434feae0d1..4c44d6fe9cf 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -25,6 +25,8 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
TAILQ_INIT(&conn->collqh); /* Collator list */
TAILQ_INIT(&conn->compqh); /* Compressor list */
+ TAILQ_INIT(&conn->lsmqh); /* WT_LSM_TREE list */
+
/* Statistics. */
WT_RET(__wt_stat_alloc_connection_stats(session, &conn->stats));
diff --git a/src/include/api.h b/src/include/api.h
index 984614cc183..7d080925be5 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -202,7 +202,6 @@ struct __wt_connection_impl {
WT_FH *lock_fh; /* Lock file handle */
pthread_t cache_evict_tid; /* Cache eviction server thread ID */
- pthread_t cache_read_tid; /* Cache read server thread ID */
/* Locked: btree list */
TAILQ_HEAD(__wt_btree_qh, __wt_btree) btqh;
@@ -265,6 +264,9 @@ struct __wt_connection_impl {
uint32_t direct_io;
uint32_t verbose;
+ /* XXX LSM stuff. Will move. */
+ TAILQ_HEAD(__wt_lsm_qh, __wt_lsm_tree) lsmqh;
+
uint32_t flags;
};
@@ -319,7 +321,7 @@ struct __wt_connection_impl {
#define CURSOR_API_CALL_NOCONF(cur, s, n, bt) \
(s) = (WT_SESSION_IMPL *)(cur)->session; \
- API_CALL_NOCONF(s, cursor, n, cur, bt); \
+ API_CALL_NOCONF(s, cursor, n, cur, bt)
/*******************************************
* Global variables.
diff --git a/src/include/extern.h b/src/include/extern.h
index 19055a70141..9b4fc8b8f1f 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -287,6 +287,8 @@ extern int __wt_btree_open(WT_SESSION_IMPL *session,
extern int __wt_btree_close(WT_SESSION_IMPL *session);
extern int __wt_btree_tree_open(WT_SESSION_IMPL *session, WT_ITEM *dsk);
extern int __wt_btree_root_empty(WT_SESSION_IMPL *session, WT_PAGE **leafp);
+extern int __wt_btree_get_memsize(WT_SESSION_IMPL *session,
+ uint32_t **memsizep);
extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session,
const char *config);
extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session);
@@ -631,6 +633,23 @@ extern int __wt_log_printf(WT_SESSION_IMPL *session,
2,
3)));
extern WT_LOGREC_DESC __wt_logdesc_debug;
+extern int __wt_clsm_open(WT_SESSION_IMPL *session,
+ const char *uri,
+ const char *cfg[],
+ WT_CURSOR **cursorp);
+extern int __wt_lsm_init(WT_CONNECTION *wt_conn, const char *config);
+extern int __wt_lsm_cleanup(WT_CONNECTION *wt_conn);
+extern int __wt_lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
+extern int __wt_lsm_tree_close_all(WT_SESSION_IMPL *session);
+extern int __wt_lsm_tree_create( WT_SESSION_IMPL *session,
+ const char *uri,
+ const char *config);
+extern int __wt_lsm_tree_get( WT_SESSION_IMPL *session,
+ const char *uri,
+ WT_LSM_TREE **treep);
+extern int __wt_lsm_tree_switch(WT_SESSION_IMPL *session,
+ WT_LSM_TREE *lsm_tree);
+extern void *__wt_lsm_worker(void *arg);
extern int __wt_metadata_get(WT_SESSION *session,
const char *uri,
const char **valuep);
@@ -1061,7 +1080,7 @@ extern int __wt_buf_catfmt(WT_SESSION_IMPL *session,
4)));
extern int
__wt_scr_alloc_func(WT_SESSION_IMPL *session,
- uint32_t size, WT_ITEM **scratchp
+ size_t size, WT_ITEM **scratchp
#ifdef HAVE_DIAGNOSTIC
, const char *file, int line
#endif
diff --git a/src/include/lsm.h b/src/include/lsm.h
new file mode 100644
index 00000000000..20df03e4608
--- /dev/null
+++ b/src/include/lsm.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2008-2012 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+struct __wt_cursor_lsm {
+ WT_CURSOR iface;
+
+ WT_LSM_TREE *lsmtree;
+ uint64_t dsk_gen;
+
+ int nchunks;
+ WT_CURSOR **cursors;
+ WT_CURSOR *current; /* The current cursor for iteration */
+
+#define WT_CLSM_MULTIPLE 0x01 /* Multiple cursors have values for the
+ current key */
+#define WT_CLSM_ITERATE_NEXT 0x02 /* Forward iteration */
+#define WT_CLSM_ITERATE_PREV 0x04 /* Backward iteration */
+ uint32_t flags;
+};
+
+struct __wt_lsm_tree {
+ const char *name, *filename;
+ const char *key_format, *value_format, *file_config;
+
+ WT_COLLATOR *collator;
+
+ WT_RWLOCK *rwlock;
+ TAILQ_ENTRY(__wt_lsm_tree) q;
+
+ WT_SPINLOCK lock;
+ uint64_t dsk_gen, ncursor, old_cursors;
+ uint32_t *memsizep;
+
+ uint32_t threshhold;
+
+ WT_CONNECTION_IMPL *conn; /* Passed to thread_create */
+ pthread_t worker_tid; /* LSM worker thread */
+
+ int nchunks; /* Number of active chunks */
+ const char **chunk; /* Array of chunk URIs */
+ size_t chunk_allocated; /* Space allocated for chunks */
+
+#define WT_LSM_TREE_OPEN 0x01
+ uint32_t flags;
+};
+
+struct __wt_lsm_data_source {
+ WT_DATA_SOURCE iface;
+
+ WT_RWLOCK *rwlock;
+
+ TAILQ_HEAD(__trees, __wt_lsm_tree) trees;
+};
diff --git a/src/include/packing.i b/src/include/packing.i
index 99f097fbcdd..5896ef5ca3f 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -117,6 +117,7 @@ next: if (pack->cur == pack->end)
}
#define WT_PACK_GET(session, pv, ap) do { \
+ WT_ITEM *__item; \
switch (pv.type) { \
case 'x': \
break; \
@@ -126,7 +127,9 @@ next: if (pack->cur == pack->end)
break; \
case 'U': \
case 'u': \
- pv.u.item = *va_arg(ap, WT_ITEM *); \
+ __item = va_arg(ap, WT_ITEM *); \
+ pv.u.item.data = __item->data; \
+ pv.u.item.size = __item->size; \
break; \
case 'b': \
case 'h': \
@@ -390,6 +393,7 @@ __unpack_read(WT_SESSION_IMPL *session,
}
#define WT_UNPACK_PUT(session, pv, ap) do { \
+ WT_ITEM *__item; \
switch (pv.type) { \
case 'x': \
break; \
@@ -399,7 +403,9 @@ __unpack_read(WT_SESSION_IMPL *session,
break; \
case 'U': \
case 'u': \
- *va_arg(ap, WT_ITEM *) = pv.u.item; \
+ __item = va_arg(ap, WT_ITEM *); \
+ __item->data = pv.u.item.data; \
+ __item->size = pv.u.item.size; \
break; \
case 'b': \
*va_arg(ap, int8_t *) = (int8_t)pv.u.i; \
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index d7373ff610c..977a06ecde7 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -1524,36 +1524,35 @@ struct __wt_data_source {
* @snippet ex_all.c WT_DATA_SOURCE drop
*/
int (*drop)(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *name, const char *config);
+ const char *name, const char *cfg[]);
/*! Callback to initialize a cursor.
*
* @snippet ex_all.c WT_DATA_SOURCE open_cursor
*/
int (*open_cursor)(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *obj, WT_CURSOR *old_cursor,
- const char *config, WT_CURSOR **new_cursor);
+ const char *obj, const char *cfg[], WT_CURSOR **new_cursor);
/*! Callback to rename an object.
*
* @snippet ex_all.c WT_DATA_SOURCE sync
*/
int (*rename)(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *oldname, const char *newname, const char *config);
+ const char *oldname, const char *newname, const char *cfg[]);
/*! Callback to sync an object.
*
* @snippet ex_all.c WT_DATA_SOURCE sync
*/
int (*sync)(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *name, const char *config);
+ const char *name, const char *cfg[]);
/*! Callback to truncate an object.
*
* @snippet ex_all.c WT_DATA_SOURCE truncate
*/
int (*truncate)(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *name, const char *config);
+ const char *name, const char *cfg[]);
};
/*!
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index 2debb921bdb..01ecdae0311 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -99,6 +99,8 @@ struct __wt_cursor_dump;
typedef struct __wt_cursor_dump WT_CURSOR_DUMP;
struct __wt_cursor_index;
typedef struct __wt_cursor_index WT_CURSOR_INDEX;
+struct __wt_cursor_lsm;
+ typedef struct __wt_cursor_lsm WT_CURSOR_LSM;
struct __wt_cursor_stat;
typedef struct __wt_cursor_stat WT_CURSOR_STAT;
struct __wt_cursor_table;
@@ -123,6 +125,10 @@ struct __wt_insert;
typedef struct __wt_insert WT_INSERT;
struct __wt_insert_head;
typedef struct __wt_insert_head WT_INSERT_HEAD;
+struct __wt_lsm_data_source;
+ typedef struct __wt_lsm_data_source WT_LSM_DATA_SOURCE;
+struct __wt_lsm_tree;
+ typedef struct __wt_lsm_tree WT_LSM_TREE;
struct __wt_named_collator;
typedef struct __wt_named_collator WT_NAMED_COLLATOR;
struct __wt_named_compressor;
@@ -191,6 +197,7 @@ struct __wt_update;
#include "api.h"
#include "cursor.h"
+#include "lsm.h"
#include "meta.h"
#include "schema.h"
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
new file mode 100644
index 00000000000..71d5fec7703
--- /dev/null
+++ b/src/lsm/lsm_cursor.c
@@ -0,0 +1,576 @@
+/*-
+ * Copyright (c) 2008-2012 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+#define FORALL_CURSORS(clsm, c, i) \
+ for (i = clsm->nchunks - 1; i >= 0; i--) \
+ if ((c = clsm->cursors[i]) != NULL)
+
+#define WT_LSM_CMP(s, lsmtree, k1, k2, cmp) \
+ (((lsmtree)->collator == NULL) ? \
+ (((cmp) = __wt_btree_lex_compare((k1), (k2))), 0) : \
+ (lsmtree)->collator->compare((lsmtree)->collator, &(s)->iface, \
+ (k1), (k2), &(cmp)))
+
+#define WT_LSM_CURCMP(s, lsmtree, c1, c2, cmp) \
+ WT_LSM_CMP(s, lsmtree, &(c1)->key, &(c2)->key, cmp)
+
+/*
+ * LSM API enter/leave: check that the cursor is in sync with the tree.
+ */
+#define WT_LSM_ENTER(clsm, cursor, session, n) \
+ clsm = (WT_CURSOR_LSM *)cursor; \
+ CURSOR_API_CALL_NOCONF(cursor, session, next, NULL); \
+ WT_TRET(__clsm_enter(clsm))
+
+#define WT_LSM_END(clsm, session) \
+ WT_TRET(__clsm_leave(clsm)); \
+ API_END(session)
+
+static int __clsm_open_cursors(WT_CURSOR_LSM *);
+
+static inline int
+__clsm_enter(WT_CURSOR_LSM *clsm)
+{
+ if (clsm->dsk_gen != clsm->lsmtree->dsk_gen)
+ return (__clsm_open_cursors(clsm));
+
+ /* TODO: indicate somehow that we are in the tree. */
+ return (0);
+}
+
+static inline int
+__clsm_leave(WT_CURSOR_LSM *clsm)
+{
+ WT_UNUSED(clsm);
+
+ /* TODO: indicate somehow that we are no longer in the tree. */
+ return (0);
+}
+
+/*
+ * TODO: use something other than an empty value as a tombstone: we need
+ * to support empty values from the application.
+ */
+static WT_ITEM __lsm_tombstone = { "", 0, 0, NULL, 0 };
+
+#define WT_LSM_NEEDVALUE(c) do { \
+ WT_CURSOR_NEEDVALUE(c); \
+ if (!__clsm_islive(&(c)->value)) \
+ WT_ERR(__wt_cursor_kv_not_set(cursor, 0)); \
+} while (0)
+
+static inline int
+__clsm_islive(WT_ITEM *item)
+{
+ return (item->size != 0);
+}
+
+/*
+ * __clsm_get_current --
+ * Find the smallest / largest of the cursors and copy its key/value.
+ */
+static int
+__clsm_get_current(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, int smallest)
+{
+ WT_CURSOR *c, *current;
+ int i;
+ int cmp, multiple;
+
+ current = NULL;
+ FORALL_CURSORS(clsm, c, i) {
+ if (!F_ISSET(c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET))
+ continue;
+ if (current == NULL) {
+ cmp = (smallest ? -1 : 1);
+ } else
+ WT_RET(WT_LSM_CURCMP(session,
+ clsm->lsmtree, c, current, cmp));
+ if (smallest ? cmp < 0 : cmp > 0) {
+ current = c;
+ multiple = 0;
+ } else if (cmp == 0)
+ multiple = 1;
+ }
+
+ c = &clsm->iface;
+ if ((clsm->current = current) == NULL) {
+ F_CLR(c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ return (WT_NOTFOUND);
+ }
+
+ if (multiple)
+ F_SET(clsm, WT_CLSM_MULTIPLE);
+ else
+ F_CLR(clsm, WT_CLSM_MULTIPLE);
+
+ WT_RET(current->get_key(current, &c->key));
+ WT_RET(current->get_value(current, &c->value));
+ F_SET(c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+ return (0);
+}
+
+/*
+ * __clsm_next --
+ * WT_CURSOR->next method for the LSM cursor type.
+ */
+static int
+__clsm_next(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int i;
+ int check, cmp;
+
+ WT_LSM_ENTER(clsm, cursor, session, next);
+
+ /* If we aren't positioned, get started. */
+ if (clsm->current == NULL) {
+ FORALL_CURSORS(clsm, c, i) {
+ WT_ERR(c->reset(c));
+ WT_ERR_NOTFOUND_OK(c->next(c));
+ }
+ } else {
+ /*
+ * If there are multiple cursors on that key, move them
+ * forward.
+ */
+ if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
+ check = 0;
+ FORALL_CURSORS(clsm, c, i) {
+ if (!F_ISSET(c,
+ WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET))
+ continue;
+ if (check) {
+ WT_ERR(WT_LSM_CURCMP(session,
+ clsm->lsmtree, c, clsm->current,
+ cmp));
+ if (cmp == 0)
+ WT_ERR_NOTFOUND_OK(c->next(c));
+ }
+ if (c == clsm->current)
+ check = 1;
+ }
+ }
+
+ /* Move the smallest cursor forward. */
+ c = clsm->current;
+ WT_ERR_NOTFOUND_OK(c->next(c));
+ }
+
+ /* Find the cursor(s) with the smallest key. */
+ ret = __clsm_get_current(session, clsm, 1);
+err: WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+/*
+ * __clsm_prev --
+ * WT_CURSOR->prev method for the LSM cursor type.
+ */
+static int
+__clsm_prev(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int i;
+ int check, cmp;
+
+ WT_LSM_ENTER(clsm, cursor, session, next);
+
+ /* If we aren't positioned, get started. */
+ if (clsm->current == NULL) {
+ FORALL_CURSORS(clsm, c, i) {
+ WT_ERR(c->reset(c));
+ WT_ERR_NOTFOUND_OK(c->prev(c));
+ }
+ } else {
+ /*
+ * If there are multiple cursors on that key, move them
+ * backwards.
+ */
+ if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
+ check = 0;
+ FORALL_CURSORS(clsm, c, i) {
+ if (!F_ISSET(c,
+ WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET))
+ continue;
+ if (check) {
+ WT_ERR(WT_LSM_CURCMP(session,
+ clsm->lsmtree, c, clsm->current,
+ cmp));
+ if (cmp == 0)
+ WT_ERR_NOTFOUND_OK(c->prev(c));
+ }
+ if (c == clsm->current)
+ check = 1;
+ }
+ }
+
+ /* Move the smallest cursor backwards. */
+ c = clsm->current;
+ WT_ERR_NOTFOUND_OK(c->prev(c));
+ }
+
+ /* Find the cursor(s) with the smallest key. */
+ ret = __clsm_get_current(session, clsm, 0);
+err: WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+/*
+ * __clsm_reset --
+ * WT_CURSOR->reset method for the LSM cursor type.
+ */
+static int
+__clsm_reset(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_LSM_ENTER(clsm, cursor, session, reset);
+ if ((c = clsm->current) != NULL) {
+ ret = c->reset(c);
+ clsm->current = NULL;
+ }
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+/*
+ * __clsm_search --
+ * WT_CURSOR->search method for the LSM cursor type.
+ */
+static int
+__clsm_search(WT_CURSOR *cursor)
+{
+ WT_CURSOR *c;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int i;
+
+ WT_LSM_ENTER(clsm, cursor, session, search);
+ WT_CURSOR_NEEDKEY(cursor);
+ ret = WT_NOTFOUND;
+ FORALL_CURSORS(clsm, c, i) {
+ c->set_key(c, &cursor->key);
+ WT_ERR_NOTFOUND_OK(c->search(c));
+ if (ret == 0) {
+ WT_ERR(c->get_key(c, &cursor->key));
+ WT_ERR(c->get_value(c, &cursor->value));
+ clsm->current = c;
+ F_SET(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ break;
+ }
+ }
+
+err: WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+/*
+ * __clsm_search_near --
+ * WT_CURSOR->search_near method for the LSM cursor type.
+ */
+static int
+__clsm_search_near(WT_CURSOR *cursor, int *exact)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_LSM_ENTER(clsm, cursor, session, search_near);
+ WT_CURSOR_NEEDKEY(cursor);
+
+ /*
+ * TODO: implement -- we need the closest key we find, which is going
+ * to require some care during the lookup.
+ */
+ WT_UNUSED(exact);
+ ret = ENOTSUP;
+err: WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+/*
+ * __clsm_put --
+ * Put an entry into the in-memory tree, trigger a file switch if
+ * necessary.
+ */
+static inline int
+__clsm_put(
+ WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, WT_ITEM *key, WT_ITEM *value)
+{
+ WT_CURSOR *primary;
+ WT_LSM_TREE *lsmtree;
+ uint32_t *memsizep;
+
+ lsmtree = clsm->lsmtree;
+
+ primary = clsm->cursors[clsm->nchunks - 1];
+ primary->set_key(primary, key);
+ primary->set_value(primary, value);
+ WT_RET(primary->insert(primary));
+
+ if ((memsizep = lsmtree->memsizep) != NULL &&
+ *memsizep > lsmtree->threshhold)
+ WT_RET(__wt_lsm_tree_switch(session, clsm->lsmtree));
+
+ return (0);
+}
+
+/*
+ * __clsm_insert --
+ * WT_CURSOR->insert method for the LSM cursor type.
+ */
+static int
+__clsm_insert(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_LSM_ENTER(clsm, cursor, session, insert);
+ WT_CURSOR_NEEDKEY(cursor);
+ WT_LSM_NEEDVALUE(cursor);
+
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
+ (ret = __clsm_search(cursor)) != WT_NOTFOUND) {
+ if (ret == 0)
+ ret = WT_DUPLICATE_KEY;
+ return (ret);
+ }
+
+ ret = __clsm_put(session, clsm, &cursor->key, &cursor->value);
+ clsm->current = NULL;
+
+err: WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+/*
+ * __clsm_update --
+ * WT_CURSOR->update method for the LSM cursor type.
+ */
+static int
+__clsm_update(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_LSM_ENTER(clsm, cursor, session, update);
+ WT_CURSOR_NEEDKEY(cursor);
+ WT_LSM_NEEDVALUE(cursor);
+
+ if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) ||
+ (ret = __clsm_search(cursor)) == 0)
+ ret = __clsm_put(session, clsm, &cursor->key, &cursor->value);
+
+err: WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+/*
+ * __clsm_remove --
+ * WT_CURSOR->remove method for the LSM cursor type.
+ */
+static int
+__clsm_remove(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_LSM_ENTER(clsm, cursor, session, update);
+ WT_CURSOR_NEEDKEY(cursor);
+
+ if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) ||
+ (ret = __clsm_search(cursor)) == 0)
+ ret = __clsm_put(session, clsm, &cursor->key, &__lsm_tombstone);
+
+err: WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+/*
+ * __clsm_close --
+ * WT_CURSOR->close method for the LSM cursor type.
+ */
+static int
+__clsm_close(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int i;
+
+ WT_LSM_ENTER(clsm, cursor, session, close);
+ FORALL_CURSORS(clsm, c, i)
+ WT_TRET(c->close(c));
+ __wt_free(session, clsm->cursors);
+ /* The WT_LSM_TREE owns the URI. */
+ cursor->uri = NULL;
+ WT_TRET(__wt_cursor_close(cursor));
+ WT_LSM_END(clsm, session);
+
+ return (ret);
+}
+
+static int
+__clsm_open_cursors(WT_CURSOR_LSM *clsm)
+{
+ WT_CURSOR *c, **cp;
+ WT_DECL_RET;
+ WT_LSM_TREE *lsmtree;
+ WT_SESSION_IMPL *session;
+ int i;
+
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+ lsmtree = clsm->lsmtree;
+
+ if (clsm->cursors != NULL) {
+ FORALL_CURSORS(clsm, c, i) {
+ clsm->cursors[i] = NULL;
+ WT_RET(c->close(c));
+ }
+ }
+
+ __wt_spin_lock(session, &lsmtree->lock);
+ clsm->dsk_gen = lsmtree->dsk_gen;
+
+ if (clsm->cursors != NULL) {
+ WT_ASSERT(session, lsmtree->old_cursors > 0);
+ --lsmtree->old_cursors;
+ }
+ ++lsmtree->ncursor;
+
+ if (lsmtree->nchunks > clsm->nchunks)
+ WT_RET(__wt_realloc(session, NULL,
+ lsmtree->nchunks * sizeof(WT_CURSOR *),
+ &clsm->cursors));
+ clsm->nchunks = lsmtree->nchunks;
+
+ for (i = 0, cp = clsm->cursors; i != clsm->nchunks; i++, cp++) {
+ WT_ERR(__wt_curfile_open(session,
+ lsmtree->chunk[i], &clsm->iface, NULL, cp));
+
+ /* Child cursors always use overwrite and raw mode. */
+ F_SET(*cp, WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
+
+ /* Peek into the btree layer to track the in-memory size. */
+ if (i == 0 && lsmtree->memsizep == NULL)
+ WT_ERR(__wt_btree_get_memsize(
+ session, &lsmtree->memsizep));
+ }
+
+err: __wt_spin_unlock(session, &lsmtree->lock);
+ return (ret);
+}
+
+/*
+ * __wt_clsm_open --
+ * WT_SESSION->open_cursor method for LSM cursors.
+ */
+int
+__wt_clsm_open(WT_SESSION_IMPL *session,
+ const char *uri, const char *cfg[], WT_CURSOR **cursorp)
+{
+ static WT_CURSOR iface = {
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ __clsm_next,
+ __clsm_prev,
+ __clsm_reset,
+ __clsm_search,
+ __clsm_search_near,
+ __clsm_insert,
+ __clsm_update,
+ __clsm_remove,
+ __clsm_close,
+ { NULL, NULL }, /* TAILQ_ENTRY q */
+ 0, /* recno key */
+ { 0 }, /* raw recno buffer */
+ { NULL, 0, 0, NULL, 0 },/* WT_ITEM key */
+ { NULL, 0, 0, NULL, 0 },/* WT_ITEM value */
+ 0, /* int saved_err */
+ 0 /* uint32_t flags */
+ };
+ WT_CONFIG_ITEM cval;
+ WT_CURSOR *cursor;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_LSM_TREE *lsmtree;
+
+ clsm = NULL;
+
+ if (!WT_PREFIX_MATCH(uri, "lsm:"))
+ return (EINVAL);
+
+ /* Get the LSM tree. */
+ WT_RET(__wt_lsm_tree_get(session, uri, &lsmtree));
+
+ WT_RET(__wt_calloc_def(session, 1, &clsm));
+
+ cursor = &clsm->iface;
+ *cursor = iface;
+ cursor->session = &session->iface;
+ cursor->uri = lsmtree->name;
+ cursor->key_format = lsmtree->key_format;
+ cursor->value_format = lsmtree->value_format;
+
+ clsm->lsmtree = lsmtree;
+
+ /*
+ * The tree's dsk_gen starts at one, so starting the cursor on zero
+ * will force a call into open_cursors on the first operation.
+ */
+ clsm->dsk_gen = 0;
+
+ STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0);
+ WT_ERR(__wt_cursor_init(cursor, cursor->uri, 0, cfg, cursorp));
+
+ /*
+ * LSM cursors default to overwrite: if no setting was supplied, turn
+ * it on.
+ */
+ if (cfg[1] != NULL || __wt_config_getones(
+ session, cfg[1], "overwrite", &cval) == WT_NOTFOUND)
+ F_SET(cursor, WT_CURSTD_OVERWRITE);
+
+ if (0) {
+err: (void)__clsm_close(cursor);
+ }
+
+ return (ret);
+}
diff --git a/src/lsm/lsm_dsrc.c b/src/lsm/lsm_dsrc.c
new file mode 100644
index 00000000000..e712779a0ac
--- /dev/null
+++ b/src/lsm/lsm_dsrc.c
@@ -0,0 +1,154 @@
+/*-
+ * Copyright (c) 2008-2012 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __lsm_create --
+ * Implementation of the create operation for LSM trees.
+ */
+static int
+__lsm_create(WT_DATA_SOURCE *dsrc, WT_SESSION *wt_session,
+ const char *uri, const char *config)
+{
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(dsrc);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ return (__wt_lsm_tree_create(session, uri, config));
+}
+
+/*
+ * __lsm_drop --
+ * Implementation of the drop operation for LSM trees.
+ */
+static int
+__lsm_drop(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
+ const char *name, const char *cfg[])
+{
+ WT_UNUSED(dsrc);
+ WT_UNUSED(session);
+ WT_UNUSED(name);
+ WT_UNUSED(cfg);
+
+ return (ENOTSUP);
+}
+
+/*
+ * __lsm_open_cursor --
+ * Implementation of the open_cursor operation for LSM trees.
+ */
+static int
+__lsm_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *wt_session,
+ const char *obj, const char *cfg[], WT_CURSOR **new_cursor)
+{
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ WT_UNUSED(dsrc);
+
+ return (__wt_clsm_open(session, obj, cfg, new_cursor));
+}
+
+/*
+ * __lsm_rename --
+ * Implementation of the rename operation for LSM trees.
+ */
+static int
+__lsm_rename(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
+ const char *oldname, const char *newname, const char *cfg[])
+{
+ WT_UNUSED(dsrc);
+ WT_UNUSED(session);
+ WT_UNUSED(oldname);
+ WT_UNUSED(newname);
+ WT_UNUSED(cfg);
+
+ return (ENOTSUP);
+}
+
+/*
+ * __lsm_sync --
+ * Implementation of the sync operation for LSM trees.
+ */
+static int
+__lsm_sync(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
+ const char *name, const char *cfg[])
+{
+ WT_UNUSED(dsrc);
+ WT_UNUSED(session);
+ WT_UNUSED(name);
+ WT_UNUSED(cfg);
+
+ return (ENOTSUP);
+}
+
+/*
+ * __lsm_truncate --
+ * Implementation of the truncate operation for LSM trees.
+ */
+static int
+__lsm_truncate(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
+ const char *name, const char *cfg[])
+{
+ WT_UNUSED(dsrc);
+ WT_UNUSED(session);
+ WT_UNUSED(name);
+ WT_UNUSED(cfg);
+
+ return (ENOTSUP);
+}
+
+/*
+ * __wt_lsm_init --
+ * Initialize LSM structures during wiredtiger_open.
+ */
+int
+__wt_lsm_init(WT_CONNECTION *wt_conn, const char *config)
+{
+ WT_CONNECTION_IMPL *conn;
+ static WT_LSM_DATA_SOURCE *lsm_dsrc;
+ WT_SESSION_IMPL *session;
+ static WT_DATA_SOURCE iface = {
+ __lsm_create,
+ __lsm_drop,
+ __lsm_open_cursor,
+ __lsm_rename,
+ __lsm_sync,
+ __lsm_truncate
+ };
+
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ session = conn->default_session;
+
+ WT_RET(__wt_calloc_def(session, 1, &lsm_dsrc));
+
+ lsm_dsrc->iface = iface;
+ WT_RET(
+ __wt_rwlock_alloc(session, "lsm data source", &lsm_dsrc->rwlock));
+ TAILQ_INIT(&lsm_dsrc->trees);
+
+ return (wt_conn->add_data_source(wt_conn,
+ "lsm:", &lsm_dsrc->iface, config));
+}
+
+/*
+ * __wt_lsm_cleanup --
+ * Clean up LSM structures during connection close.
+ */
+int
+__wt_lsm_cleanup(WT_CONNECTION *wt_conn)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
+
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ session = conn->default_session;
+
+ return (__wt_lsm_tree_close_all(session));
+}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
new file mode 100644
index 00000000000..617abc589e4
--- /dev/null
+++ b/src/lsm/lsm_tree.c
@@ -0,0 +1,189 @@
+/*-
+ * Copyright (c) 2008-2012 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+static void
+__lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ u_int i;
+
+ TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q);
+ __wt_spin_destroy(session, &lsm_tree->lock);
+
+ __wt_free(session, lsm_tree->name);
+ for (i = 0; i < lsm_tree->nchunks; i++)
+ __wt_free(session, lsm_tree->chunk[i]);
+ __wt_free(session, lsm_tree->chunk);
+}
+
+/*
+ * __wt_lsm_tree_close --
+ * Close an lsm tree structure.
+ */
+int
+__wt_lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_DECL_RET;
+
+ if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) {
+ F_CLR(lsm_tree, WT_LSM_TREE_OPEN);
+ WT_TRET(__wt_thread_join(lsm_tree->worker_tid));
+ }
+
+ __lsm_tree_discard(session, lsm_tree);
+ return (ret);
+}
+
+/*
+ * __wt_lsm_tree_close_all --
+ * Close an lsm tree structure.
+ */
+int
+__wt_lsm_tree_close_all(WT_SESSION_IMPL *session)
+{
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
+
+ while ((lsm_tree = TAILQ_FIRST(&S2C(session)->lsmqh)) != NULL)
+ WT_TRET(__wt_lsm_tree_close(session, lsm_tree));
+
+ return (ret);
+}
+
+/*
+ * __lsm_tree_create_chunk --
+ * Create a chunk of an LSM tree.
+ */
+static int
+__lsm_tree_create_chunk(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int i)
+{
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf, "file:%s-%06d.lsm",
+ lsm_tree->filename, i + 1));
+ lsm_tree->chunk[i] = __wt_buf_steal(session, buf, NULL);
+ WT_ERR(__wt_schema_create(session,
+ lsm_tree->chunk[i], lsm_tree->file_config));
+
+ /* TODO: update metadata. */
+
+err: __wt_scr_free(&buf);
+ return (ret);
+}
+
+/*
+ * __wt_lsm_tree_get --
+ * Get an LSM tree structure for the given name.
+ */
+int
+__wt_lsm_tree_create(
+ WT_SESSION_IMPL *session, const char *uri, const char *config)
+{
+ WT_CONFIG_ITEM cval;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
+ const char *cfg[] = API_CONF_DEFAULTS(session, create, config);
+
+ /*
+ * XXX this call should just insert the metadata: most of this should
+ * move to __wt_lsm_tree_open.
+ */
+ WT_RET(__wt_calloc_def(session, 1, &lsm_tree));
+ __wt_spin_init(session, &lsm_tree->lock);
+ TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q);
+
+ WT_RET(__wt_strdup(session, uri, &lsm_tree->name));
+ lsm_tree->filename = uri + strlen("lsm:");
+
+ WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len,
+ &lsm_tree->key_format));
+ WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len,
+ &lsm_tree->value_format));
+
+ lsm_tree->dsk_gen = 1;
+ lsm_tree->nchunks = 1;
+
+ /* TODO: make this configurable. */
+ lsm_tree->threshhold = 2 * WT_MEGABYTE;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf,
+ "%s,key_format=u,value_format=u", config));
+ lsm_tree->file_config = __wt_buf_steal(session, buf, NULL);
+
+ WT_ERR(__wt_calloc_def(session, lsm_tree->nchunks, &lsm_tree->chunk));
+ lsm_tree->chunk_allocated = lsm_tree->nchunks * sizeof(const char *);
+
+ WT_ERR(__lsm_tree_create_chunk(session, lsm_tree, 0));
+
+ /* XXX This should definitely only happen when opening the tree. */
+ lsm_tree->conn = S2C(session);
+ WT_ERR(__wt_thread_create(
+ &lsm_tree->worker_tid, __wt_lsm_worker, lsm_tree));
+ F_SET(lsm_tree, WT_LSM_TREE_OPEN);
+
+ if (0) {
+err: __lsm_tree_discard(session, lsm_tree);
+ }
+ __wt_scr_free(&buf);
+ return (ret);
+}
+
+/*
+ * __wt_lsm_tree_get --
+ * get an lsm tree structure for the given name.
+ */
+int
+__wt_lsm_tree_get(
+ WT_SESSION_IMPL *session, const char *uri, WT_LSM_TREE **treep)
+{
+ WT_LSM_TREE *lsm_tree;
+
+ TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q)
+ if (strcmp(uri, lsm_tree->name) == 0) {
+ *treep = lsm_tree;
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+/*
+ * __wt_lsm_tree_switch --
+ * Switch to a new in-memory tree.
+ */
+int
+__wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_DECL_RET;
+
+ __wt_spin_lock(session, &lsm_tree->lock);
+
+ lsm_tree->old_cursors += lsm_tree->ncursor;
+ ++lsm_tree->dsk_gen;
+
+ /* TODO more sensible realloc */
+ WT_ERR(__wt_realloc(session,
+ &lsm_tree->chunk_allocated,
+ (lsm_tree->nchunks + 1) * sizeof(*lsm_tree->chunk),
+ &lsm_tree->chunk));
+ WT_WITH_SCHEMA_LOCK(session, ret =
+ __lsm_tree_create_chunk(session, lsm_tree, lsm_tree->nchunks));
+ WT_ERR(ret);
+ ++lsm_tree->nchunks;
+
+err: __wt_spin_unlock(session, &lsm_tree->lock);
+ /* TODO: mark lsm_tree bad on error(?) */
+ return (ret);
+}
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
new file mode 100644
index 00000000000..c3767f4e970
--- /dev/null
+++ b/src/lsm/lsm_worker.c
@@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2008-2012 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_lsm_worker --
+ * The worker thread for an LSM tree, responsible for writing in-memory
+ * trees to disk and merging on-disk trees.
+ */
+void *
+__wt_lsm_worker(void *arg)
+{
+ WT_CONNECTION *wt_conn;
+ WT_CONNECTION_IMPL *conn;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION *session;
+
+ lsm_tree = arg;
+ conn = lsm_tree->conn;
+ wt_conn = &conn->iface;
+
+ if (wt_conn->open_session(wt_conn, NULL, NULL, &session) != 0)
+ return (NULL);
+
+ while (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN))
+ __wt_yield();
+
+ return (NULL);
+}
diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c
index 4ef8e8d0220..f16b76286e3 100644
--- a/src/schema/schema_drop.c
+++ b/src/schema/schema_drop.c
@@ -250,7 +250,7 @@ __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
else if (WT_PREFIX_MATCH(uri, "table:"))
ret = __drop_table(session, uri, force, cfg);
else if ((ret = __wt_schema_get_source(session, uri, &dsrc)) == 0)
- ret = dsrc->drop(dsrc, &session->iface, uri, cfg[1]);
+ ret = dsrc->drop(dsrc, &session->iface, uri, cfg);
/*
* Map WT_NOTFOUND to ENOENT (or to 0 if "force" is set), based on the
diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c
index 9115e6cdc4b..f052be06fdc 100644
--- a/src/schema/schema_rename.c
+++ b/src/schema/schema_rename.c
@@ -225,7 +225,7 @@ __wt_schema_rename(WT_SESSION_IMPL *session,
ret = __rename_table(session, oldname, newname);
} else if ((ret = __wt_schema_get_source(session, oldname, &dsrc)) == 0)
ret = dsrc->rename(dsrc,
- &session->iface, oldname, newname, cfg[1]);
+ &session->iface, oldname, newname, cfg);
WT_TRET(__wt_meta_track_off(session, ret != 0));
diff --git a/src/schema/schema_truncate.c b/src/schema/schema_truncate.c
index c5492ca69cd..96066be25c7 100644
--- a/src/schema/schema_truncate.c
+++ b/src/schema/schema_truncate.c
@@ -104,7 +104,7 @@ __wt_schema_truncate(
else if (WT_PREFIX_SKIP(tablename, "table:"))
ret = __truncate_table(session, tablename);
else if ((ret = __wt_schema_get_source(session, uri, &dsrc)) == 0)
- ret = dsrc->truncate(dsrc, &session->iface, uri, cfg[1]);
+ ret = dsrc->truncate(dsrc, &session->iface, uri, cfg);
/* If we didn't find a metadata entry, map that error to ENOENT. */
return (ret == WT_NOTFOUND ? ENOENT : ret);
diff --git a/src/schema/schema_util.c b/src/schema/schema_util.c
index 934781c9d7d..8c5b2c74527 100644
--- a/src/schema/schema_util.c
+++ b/src/schema/schema_util.c
@@ -35,27 +35,24 @@ int
__wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri)
{
const char *name, *sep;
+ int skipped;
/*
* Check if name is somewhere in the WiredTiger name space: it would be
- * "bad" if the application truncated the metadata file. We get passed
- * both objects and simple strings, skip any leading URI prefix.
+ * "bad" if the application truncated the metadata file. Skip any
+ * leading URI prefix, check and then skip over a table name.
*/
name = uri;
- if (WT_PREFIX_SKIP(name, "colgroup:") ||
- WT_PREFIX_SKIP(name, "index:")) {
- /* These URIs normally reference a table name. */
- if ((sep = strchr(name, ':')) != NULL)
- name = sep + 1;
- } else if (!WT_PREFIX_SKIP(name, "table:") &&
- !WT_PREFIX_SKIP(name, "file:"))
- return (__wt_bad_object_type(session, uri));
+ for (skipped = 0; skipped < 2; skipped++) {
+ if ((sep = strchr(name, ':')) == NULL)
+ break;
- if (WT_PREFIX_MATCH(name, "WiredTiger"))
- WT_RET_MSG(session, EINVAL,
- "%s: the \"WiredTiger\" name space may not be used by "
- "applications",
- name);
+ name = sep + 1;
+ if (WT_PREFIX_MATCH(name, "WiredTiger"))
+ WT_RET_MSG(session, EINVAL,
+ "%s: the \"WiredTiger\" name space may not be "
+ "used by applications", name);
+ }
/*
* Disallow JSON quoting characters -- the config string parsing code
@@ -67,5 +64,6 @@ __wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri)
"%s: WiredTiger objects should not include grouping "
"characters in their names",
name);
+
return (0);
}
diff --git a/src/session/session_api.c b/src/session/session_api.c
index 01993337e6b..917833116ec 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -155,6 +155,7 @@ static int
__session_open_cursor(WT_SESSION *wt_session,
const char *uri, WT_CURSOR *to_dup, const char *config, WT_CURSOR **cursorp)
{
+ WT_DATA_SOURCE *dsrc;
WT_DECL_RET;
WT_SESSION_IMPL *session;
@@ -171,6 +172,7 @@ __session_open_cursor(WT_SESSION *wt_session,
if (WT_PREFIX_MATCH(uri, "colgroup:") ||
WT_PREFIX_MATCH(uri, "index:") ||
WT_PREFIX_MATCH(uri, "file:") ||
+ WT_PREFIX_MATCH(uri, "lsm:") ||
WT_PREFIX_MATCH(uri, "table:"))
ret = __wt_cursor_dup(session, to_dup, config, cursorp);
else
@@ -189,8 +191,9 @@ __session_open_cursor(WT_SESSION *wt_session,
ret = __wt_curstat_open(session, uri, cfg, cursorp);
else if (WT_PREFIX_MATCH(uri, "table:"))
ret = __wt_curtable_open(session, uri, cfg, cursorp);
- else
- ret = __wt_bad_object_type(session, uri);
+ else if ((ret = __wt_schema_get_source(session, uri, &dsrc)) == 0)
+ ret = dsrc->open_cursor(dsrc, &session->iface,
+ uri, cfg, cursorp);
err: API_END_NOTFOUND_MAP(session, ret);
}
diff --git a/src/support/scratch.c b/src/support/scratch.c
index 57b779bd5c2..f9cff2d0343 100644
--- a/src/support/scratch.c
+++ b/src/support/scratch.c
@@ -249,7 +249,7 @@ __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...)
*/
int
__wt_scr_alloc_func(WT_SESSION_IMPL *session,
- uint32_t size, WT_ITEM **scratchp
+ size_t size, WT_ITEM **scratchp
#ifdef HAVE_DIAGNOSTIC
, const char *file, int line
#endif
diff --git a/test/suite/test_cursor01.py b/test/suite/test_cursor01.py
index 73a170fa5b3..3c4bbdb16dd 100644
--- a/test/suite/test_cursor01.py
+++ b/test/suite/test_cursor01.py
@@ -43,6 +43,7 @@ class test_cursor01(wttest.WiredTigerTestCase):
('file-col', dict(tablekind='col',uri='file')),
('file-fix', dict(tablekind='fix',uri='file')),
('file-row', dict(tablekind='row',uri='file')),
+ ('lsm-row', dict(tablekind='row',uri='lsm')),
('table-col', dict(tablekind='col',uri='table')),
('table-fix', dict(tablekind='fix',uri='table')),
('table-row', dict(tablekind='row',uri='table'))
@@ -139,9 +140,13 @@ class test_cursor01(wttest.WiredTigerTestCase):
value = cursor.get_value()
self.assertEqual(key, self.genkey(i))
self.assertEqual(value, self.genvalue(i))
- i += 1
dupc = self.session.open_cursor(None, cursor, None)
self.assertTrue(cursor.equals(dupc))
+ key = dupc.get_key()
+ value = dupc.get_value()
+ self.assertEqual(key, self.genkey(i))
+ self.assertEqual(value, self.genvalue(i))
+ i += 1
cursor.close()
cursor = dupc