summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/include/lsm.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/include/lsm.h')
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h232
1 files changed, 232 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
new file mode 100644
index 00000000000..99532b97850
--- /dev/null
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -0,0 +1,232 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * WT_LSM_WORKER_COOKIE --
+ * State for an LSM worker thread.
+ */
+struct __wt_lsm_worker_cookie {
+ WT_LSM_CHUNK **chunk_array;
+ size_t chunk_alloc;
+ u_int nchunks;
+};
+
+/*
+ * WT_LSM_WORKER_ARGS --
+ * State for an LSM worker thread.
+ */
+struct __wt_lsm_worker_args {
+ WT_SESSION_IMPL *session; /* Session */
+ WT_CONDVAR *work_cond; /* Owned by the manager */
+ wt_thread_t tid; /* Thread id */
+ u_int id; /* My manager slot id */
+ uint32_t type; /* Types of operations handled */
+#define WT_LSM_WORKER_RUN 0x01
+ uint32_t flags; /* Worker flags */
+};
+
+/*
+ * WT_CURSOR_LSM --
+ * An LSM cursor.
+ */
+struct __wt_cursor_lsm {
+ WT_CURSOR iface;
+
+ WT_LSM_TREE *lsm_tree;
+ uint64_t dsk_gen;
+
+ u_int nchunks; /* Number of chunks in the cursor */
+ u_int nupdates; /* Updates needed (including
+ snapshot isolation checks). */
+ WT_BLOOM **blooms; /* Bloom filter handles. */
+ size_t bloom_alloc;
+
+ WT_CURSOR **cursors; /* Cursor handles. */
+ size_t cursor_alloc;
+
+ WT_CURSOR *current; /* The current cursor for iteration */
+ WT_LSM_CHUNK *primary_chunk; /* The current primary chunk */
+
+ uint64_t *switch_txn; /* Switch txn for each chunk */
+ size_t txnid_alloc;
+
+ u_int update_count; /* Updates performed. */
+
+#define WT_CLSM_ACTIVE 0x01 /* Incremented the session count */
+#define WT_CLSM_ITERATE_NEXT 0x02 /* Forward iteration */
+#define WT_CLSM_ITERATE_PREV 0x04 /* Backward iteration */
+#define WT_CLSM_MERGE 0x08 /* Merge cursor, don't update */
+#define WT_CLSM_MINOR_MERGE 0x10 /* Minor merge, include tombstones */
+#define WT_CLSM_MULTIPLE 0x20 /* Multiple cursors have values for the
+ current key */
+#define WT_CLSM_OPEN_READ 0x40 /* Open for reads */
+#define WT_CLSM_OPEN_SNAPSHOT 0x80 /* Open for snapshot isolation */
+ uint32_t flags;
+};
+
+/*
+ * WT_LSM_CHUNK --
+ * A single chunk (file) in an LSM tree.
+ */
+struct __wt_lsm_chunk {
+ const char *uri; /* Data source for this chunk */
+ const char *bloom_uri; /* URI of Bloom filter, if any */
+ struct timespec create_ts; /* Creation time (for rate limiting) */
+ uint64_t count; /* Approximate count of records */
+ uint64_t size; /* Final chunk size */
+
+ uint64_t switch_txn; /*
+ * Largest transaction that can write
+ * to this chunk, set by a worker
+ * thread when the chunk is switched
+ * out, or by compact to get the most
+ * recent chunk flushed.
+ */
+
+ uint32_t id; /* ID used to generate URIs */
+ uint32_t generation; /* Merge generation */
+ uint32_t refcnt; /* Number of worker thread references */
+ uint32_t bloom_busy; /* Number of worker thread references */
+
+ int8_t empty; /* 1/0: checkpoint missing */
+ int8_t evicted; /* 1/0: in-memory chunk was evicted */
+
+#define WT_LSM_CHUNK_BLOOM 0x01
+#define WT_LSM_CHUNK_MERGING 0x02
+#define WT_LSM_CHUNK_ONDISK 0x04
+#define WT_LSM_CHUNK_STABLE 0x08
+ uint32_t flags;
+} WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+
+/*
+ * Different types of work units. Used by LSM worker threads to choose which
+ * type of work they will execute, and by work units to define which action
+ * is required.
+ */
+#define WT_LSM_WORK_BLOOM 0x01 /* Create a bloom filter */
+#define WT_LSM_WORK_DROP 0x02 /* Drop unused chunks */
+#define WT_LSM_WORK_FLUSH 0x04 /* Flush a chunk to disk */
+#define WT_LSM_WORK_MERGE 0x08 /* Look for a tree merge */
+#define WT_LSM_WORK_SWITCH 0x10 /* Switch to new in-memory chunk */
+
+/*
+ * WT_LSM_WORK_UNIT --
+ * A definition of maintenance that an LSM tree needs done.
+ */
+struct __wt_lsm_work_unit {
+ TAILQ_ENTRY(__wt_lsm_work_unit) q; /* Worker unit queue */
+ uint32_t type; /* Type of operation */
+#define WT_LSM_WORK_FORCE 0x0001 /* Force operation */
+ uint32_t flags; /* Flags for operation */
+ WT_LSM_TREE *lsm_tree;
+};
+
+/*
+ * WT_LSM_MANAGER --
+ * A structure that holds resources used to manage any LSM trees in a
+ * database.
+ */
+struct __wt_lsm_manager {
+ /*
+ * Queues of work units for LSM worker threads. We maintain three
+ * queues, to allow us to keep each queue FIFO, rather than needing
+ * to manage the order of work by shuffling the queue order.
+ * One queue for switches - since switches should never wait for other
+ * work to be done.
+ * One queue for application requested work. For example flushing
+ * and creating bloom filters.
+ * One queue that is for longer running operations such as merges.
+ */
+ TAILQ_HEAD(__wt_lsm_work_switch_qh, __wt_lsm_work_unit) switchqh;
+ TAILQ_HEAD(__wt_lsm_work_app_qh, __wt_lsm_work_unit) appqh;
+ TAILQ_HEAD(__wt_lsm_work_manager_qh, __wt_lsm_work_unit) managerqh;
+ WT_SPINLOCK switch_lock; /* Lock for switch queue */
+ WT_SPINLOCK app_lock; /* Lock for application queue */
+ WT_SPINLOCK manager_lock; /* Lock for manager queue */
+ WT_CONDVAR *work_cond; /* Used to notify worker of activity */
+ uint32_t lsm_workers; /* Current number of LSM workers */
+ uint32_t lsm_workers_max;
+#define WT_LSM_MAX_WORKERS 20
+ WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS];
+};
+
+/*
+ * WT_LSM_TREE --
+ * An LSM tree.
+ */
+struct __wt_lsm_tree {
+ const char *name, *config, *filename;
+ const char *key_format, *value_format;
+ const char *bloom_config, *file_config;
+
+ WT_COLLATOR *collator;
+ const char *collator_name;
+
+ int refcnt; /* Number of users of the tree */
+#define LSM_TREE_MAX_QUEUE 100
+ int queue_ref;
+ WT_RWLOCK *rwlock;
+ TAILQ_ENTRY(__wt_lsm_tree) q;
+
+ WT_DSRC_STATS stats; /* LSM-level statistics */
+
+ uint64_t dsk_gen;
+
+ long ckpt_throttle; /* Rate limiting due to checkpoints */
+ long merge_throttle; /* Rate limiting due to merges */
+ uint64_t chunk_fill_ms; /* Estimate of time to fill a chunk */
+ struct timespec last_flush_ts; /* Timestamp last flush finished */
+ struct timespec work_push_ts; /* Timestamp last work unit added */
+ uint64_t merge_progressing; /* Bumped when merges are active */
+ uint32_t merge_syncing; /* Bumped when merges are syncing */
+
+ /* Configuration parameters */
+ uint32_t bloom_bit_count;
+ uint32_t bloom_hash_count;
+ uint64_t chunk_size;
+ uint64_t chunk_max;
+ u_int merge_min, merge_max;
+
+ u_int merge_idle; /* Count of idle merge threads */
+
+#define WT_LSM_BLOOM_MERGED 0x00000001
+#define WT_LSM_BLOOM_OFF 0x00000002
+#define WT_LSM_BLOOM_OLDEST 0x00000004
+ uint32_t bloom; /* Bloom creation policy */
+
+ WT_LSM_CHUNK **chunk; /* Array of active LSM chunks */
+ size_t chunk_alloc; /* Space allocated for chunks */
+ u_int nchunks; /* Number of active chunks */
+ uint32_t last; /* Last allocated ID */
+ int modified; /* Have there been updates? */
+
+ WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */
+ size_t old_alloc; /* Space allocated for old chunks */
+ u_int nold_chunks; /* Number of old chunks */
+ int freeing_old_chunks; /* Whether chunks are being freed */
+ uint32_t merge_aggressiveness; /* Increase amount of work per merge */
+
+#define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */
+#define WT_LSM_TREE_COMPACTING 0x02 /* Tree being compacted */
+#define WT_LSM_TREE_NEED_SWITCH 0x04 /* New chunk needs creating */
+#define WT_LSM_TREE_OPEN 0x08 /* The tree is open */
+#define WT_LSM_TREE_THROTTLE 0x10 /* Throttle updates */
+ uint32_t flags;
+
+#define WT_LSM_TREE_EXCLUSIVE 0x01 /* Tree is opened exclusively */
+ uint8_t flags_atomic;
+};
+
+/*
+ * WT_LSM_DATA_SOURCE --
+ * Implementation of the WT_DATA_SOURCE interface for LSM.
+ */
+struct __wt_lsm_data_source {
+ WT_DATA_SOURCE iface;
+
+ WT_RWLOCK *rwlock;
+};