diff options
author | Michael Cahill <mjc@wiredtiger.com> | 2012-09-26 00:16:27 -0700 |
---|---|---|
committer | Michael Cahill <mjc@wiredtiger.com> | 2012-09-26 00:16:27 -0700 |
commit | d1a319479414a52a28eb3cfaff86689ee6ff421b (patch) | |
tree | 5cee1aec98e15520128edc57df2359d126807348 /src/include | |
parent | f7af15fc8855db8fffbced9ca904b65f5af4ab38 (diff) | |
parent | 57cf5c5693878b834195c754f1c4081b920dc674 (diff) | |
download | mongo-d1a319479414a52a28eb3cfaff86689ee6ff421b.tar.gz |
Merge pull request #337 from wiredtiger/nocache
Add support for cursors that operate outside of cache and have LSM merges use them.
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/api.h | 5 | ||||
-rw-r--r-- | src/include/btree.h | 16 | ||||
-rw-r--r-- | src/include/btree.i | 45 | ||||
-rw-r--r-- | src/include/cache.i | 10 | ||||
-rw-r--r-- | src/include/cursor.i | 8 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/include/misc.h | 3 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 21 |
8 files changed, 77 insertions, 33 deletions
diff --git a/src/include/api.h b/src/include/api.h index 4c6c50161a9..47129ee8291 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -130,6 +130,7 @@ struct __wt_session_impl { * easily call a function to clear memory up to, but not including, the * hazard reference. */ + uint32_t hazard_size; /* Count of used hazard references */ u_int nhazard; #define WT_SESSION_CLEAR(s) memset(s, 0, WT_PTRDIFF(&(s)->hazard, s)) WT_HAZARD *hazard; /* Hazard reference array */ @@ -213,7 +214,7 @@ struct __wt_connection_impl { /* Locked: library list */ TAILQ_HEAD(__wt_dlh_qh, __wt_dlh) dlhqh; - u_int btqcnt; /* Locked: btree count */ + u_int open_btree_count; /* Locked: open writable btree count */ u_int next_file_id; /* Locked: file ID counter */ /* @@ -235,7 +236,7 @@ struct __wt_connection_impl { * WiredTiger allocates space for a fixed number of hazard references * in each thread of control. */ - uint32_t hazard_size; /* Hazard array size */ + uint32_t hazard_max; /* Hazard array size */ WT_CACHE *cache; /* Page cache */ uint64_t cache_size; diff --git a/src/include/btree.h b/src/include/btree.h index 7211e8b0704..c1cff8f2eca 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -125,18 +125,20 @@ struct __wt_btree { #define WT_BTREE_DISCARD 0x0002 /* Discard on release */ #define WT_BTREE_EXCLUSIVE 0x0004 /* Need exclusive access to handle */ #define WT_BTREE_LOCK_ONLY 0x0008 /* Handle is only needed for locking */ -#define WT_BTREE_NO_EVICTION 0x0010 /* Disable eviction */ -#define WT_BTREE_NO_HAZARD 0x0020 /* Disable hazard references */ -#define WT_BTREE_OPEN 0x0040 /* Handle is open */ -#define WT_BTREE_SALVAGE 0x0080 /* Handle is for salvage */ -#define WT_BTREE_UPGRADE 0x0100 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x0200 /* Handle is for verify */ +#define WT_BTREE_NO_CACHE 0x0010 /* Disable caching */ +#define WT_BTREE_NO_EVICTION 0x0020 /* Disable eviction */ +#define WT_BTREE_NO_HAZARD 0x0040 /* Disable hazard references */ +#define WT_BTREE_OPEN 0x0080 /* Handle is open */ +#define WT_BTREE_SALVAGE 0x0100 /* Handle is for salvage */ +#define WT_BTREE_UPGRADE 0x0200 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x0400 /* Handle is for verify */ uint32_t flags; }; /* Flags that make a btree handle special (not for normal use). */ #define WT_BTREE_SPECIAL_FLAGS \ - (WT_BTREE_BULK | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) + (WT_BTREE_BULK | WT_BTREE_NO_CACHE | \ + WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) /* * WT_SALVAGE_COOKIE -- diff --git a/src/include/btree.i b/src/include/btree.i index 85375fadd89..e5f01bcb2e6 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -295,9 +295,43 @@ __wt_get_addr( static inline void __wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page) { - /* We never acquired a hazard reference on the root page. */ - if (page != NULL && !WT_PAGE_IS_ROOT(page)) - __wt_hazard_clear(session, page); + WT_BTREE *btree; + + btree = session->btree; + + /* + * Fast-track pages we don't have and the root page, which sticks + * in memory, regardless. + */ + if (page == NULL || WT_PAGE_IS_ROOT(page)) + return; + + /* If this is a non cached page, discard it. */ + if (F_ISSET(btree, WT_BTREE_NO_CACHE)) { + page->ref->page = NULL; + page->ref->state = WT_REF_DISK; + __wt_page_out(session, &page, 0); + return; + } + + /* Discard our hazard reference. */ + __wt_hazard_clear(session, page); +} + +/* + * __wt_stack_release -- + * Release references to a page stack. + */ +static inline void +__wt_stack_release(WT_SESSION_IMPL *session, WT_PAGE *page) +{ + WT_PAGE *next; + + while (page != NULL && !WT_PAGE_IS_ROOT(page)) { + next = page->parent; + __wt_page_release(session, page); + page = next; + } } /* @@ -310,7 +344,7 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) WT_CONNECTION_IMPL *conn; WT_HAZARD *hp; WT_SESSION_IMPL *s; - uint32_t i, session_cnt; + uint32_t i, hazard_size, session_cnt; conn = S2C(session); @@ -326,7 +360,8 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { if (!s->active) continue; - for (hp = s->hazard; hp < s->hazard + conn->hazard_size; ++hp) + WT_ORDERED_READ(hazard_size, s->hazard_size); + for (hp = s->hazard; hp < s->hazard + hazard_size; ++hp) if (hp->page == page) return (hp); } diff --git a/src/include/cache.i b/src/include/cache.i index 79b99653a78..ac4d46e00fa 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -57,9 +57,13 @@ __wt_eviction_page_check(WT_SESSION_IMPL *session, WT_PAGE *page) F_ISSET(session->btree, WT_BTREE_NO_EVICTION)) return (0); - /* Check the page's memory footprint. */ - if ((int64_t)page->memory_footprint > conn->cache_size / 2 || - page->memory_footprint > 20 * session->btree->maxleafpage) + /* + * Check the page's memory footprint - evict pages that take up more + * than their fair share of the cache. We define a fair share as + * approximately half the cache size per open writable btree handle. + */ + if ((int64_t)page->memory_footprint > + conn->cache_size / (2 * (conn->open_btree_count + 1))) return (1); /* diff --git a/src/include/cursor.i b/src/include/cursor.i index 47660419c75..e9a3240684c 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -56,11 +56,9 @@ __cursor_leave(WT_CURSOR_BTREE *cbt) cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - /* Optionally release any page references we're holding. */ - if (cbt->page != NULL) { - __wt_page_release(session, cbt->page); - cbt->page = NULL; - } + /* Release any page references we're holding. */ + __wt_stack_release(session, cbt->page); + cbt->page = NULL; /* Reset the returned key/value state. */ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); diff --git a/src/include/extern.h b/src/include/extern.h index 363c4c0a990..b856b754bfb 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -589,6 +589,8 @@ extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, extern int __wt_conn_btree_close(WT_SESSION_IMPL *session, int locked); extern int __wt_conn_btree_close_all(WT_SESSION_IMPL *session, const char *name); +extern int __wt_conn_btree_discard_single(WT_SESSION_IMPL *session, + WT_BTREE *btree); extern int __wt_conn_btree_discard(WT_CONNECTION_IMPL *conn); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn); extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn); diff --git a/src/include/misc.h b/src/include/misc.h index dd34b9aa0dc..6b82155bb08 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -52,6 +52,9 @@ #define WT_SKIP_MAXDEPTH 10 #define WT_SKIP_PROBABILITY (UINT32_MAX >> 2) +/* The number of hazard references that can be in use is grown dynamically. */ +#define WT_HAZARD_INCR 10 + /* * Quiet compiler warnings about unused parameters. */ diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index d9c094f6599..edd24a483ff 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -522,15 +522,14 @@ struct __wt_session { * @config{append, append the value as a new record\, creating a new * record number key; valid only for cursors with record number keys.,a * boolean flag; default \c false.} - * @config{bulk, configure the cursor for bulk loads; bulk-load is a - * fast load path for newly created objects and only newly created - * objects may be bulk-loaded. Cursors configured for bulk load only - * support the WT_CURSOR::insert and WT_CURSOR::close methods.,a boolean - * flag; default \c false.} - * @config{checkpoint, the name of a checkpoint to open; the reserved - * checkpoint name "WiredTigerCheckpoint" opens a cursor on the most - * recent internal checkpoint taken for the object.,a string; default - * empty.} + * @config{bulk, configure the cursor for bulk loads\, a fast load path + * that may only be used for newly created objects. Cursors configured + * for bulk load only support the WT_CURSOR::insert and WT_CURSOR::close + * methods.,a boolean flag; default \c false.} + * @config{checkpoint, the name of a checkpoint to open (the reserved + * name "WiredTigerCheckpoint" opens the most recent internal checkpoint + * taken for the object). The cursor does not support data + * modification.,a string; default empty.} * @config{dump, configure the cursor for dump format inputs and * outputs: "hex" selects a simple hexadecimal format\, "print" selects * a format where only non-printing characters are hexadecimal encoded. @@ -1146,8 +1145,8 @@ struct __wt_connection { * may need quoting\, for example\, * <code>extensions=("/path/to/ext.so"="entry=my_entry")</code>.,a list of * strings; default empty.} - * @config{hazard_max, number of simultaneous hazard references per session - * handle.,an integer greater than or equal to 15; default \c 30.} + * @config{hazard_max, maximum number of simultaneous hazard references per + * session handle.,an integer greater than or equal to 15; default \c 1000.} * @config{logging, enable logging.,a boolean flag; default \c false.} * @config{multiprocess, permit sharing between processes (will automatically * start an RPC server for primary processes and use RPC for secondary |