From 74f428047657abcb392048ccfe40db3592e09940 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 17 Sep 2012 07:53:01 +0000 Subject: Add partial support for no-cache files -- this works with two caveats: first this code maintains a full stack of tree hazard references, and so I removed the test against the maxleafpage size in the forced eviction code to avoid running out of hazard reference sizes; second, each no-cache cursor blocks all other cursor references to the object, which isn't going to be OK for real use. maintain a stack of hazard references --- src/include/btree.h | 16 +++++++++------- src/include/btree.i | 40 +++++++++++++++++++++++++++++++++++++--- src/include/cache.i | 3 +-- src/include/cursor.i | 8 +++----- src/include/wiredtiger.in | 20 +++++++++++--------- 5 files changed, 61 insertions(+), 26 deletions(-) (limited to 'src/include') diff --git a/src/include/btree.h b/src/include/btree.h index 7211e8b0704..c1cff8f2eca 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -125,18 +125,20 @@ struct __wt_btree { #define WT_BTREE_DISCARD 0x0002 /* Discard on release */ #define WT_BTREE_EXCLUSIVE 0x0004 /* Need exclusive access to handle */ #define WT_BTREE_LOCK_ONLY 0x0008 /* Handle is only needed for locking */ -#define WT_BTREE_NO_EVICTION 0x0010 /* Disable eviction */ -#define WT_BTREE_NO_HAZARD 0x0020 /* Disable hazard references */ -#define WT_BTREE_OPEN 0x0040 /* Handle is open */ -#define WT_BTREE_SALVAGE 0x0080 /* Handle is for salvage */ -#define WT_BTREE_UPGRADE 0x0100 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x0200 /* Handle is for verify */ +#define WT_BTREE_NO_CACHE 0x0010 /* Disable caching */ +#define WT_BTREE_NO_EVICTION 0x0020 /* Disable eviction */ +#define WT_BTREE_NO_HAZARD 0x0040 /* Disable hazard references */ +#define WT_BTREE_OPEN 0x0080 /* Handle is open */ +#define WT_BTREE_SALVAGE 0x0100 /* Handle is for salvage */ +#define WT_BTREE_UPGRADE 0x0200 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x0400 /* Handle is for verify */ uint32_t flags; }; /* Flags that make a btree handle special (not for normal use). */ #define WT_BTREE_SPECIAL_FLAGS \ - (WT_BTREE_BULK | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) + (WT_BTREE_BULK | WT_BTREE_NO_CACHE | \ + WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) /* * WT_SALVAGE_COOKIE -- diff --git a/src/include/btree.i b/src/include/btree.i index 85375fadd89..7f778358b1b 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -295,9 +295,43 @@ __wt_get_addr( static inline void __wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page) { - /* We never acquired a hazard reference on the root page. */ - if (page != NULL && !WT_PAGE_IS_ROOT(page)) - __wt_hazard_clear(session, page); + WT_BTREE *btree; + + btree = session->btree; + + /* + * Fast-track pages we don't have and the root page, which sticks + * in memory, regardless. + */ + if (page == NULL || WT_PAGE_IS_ROOT(page)) + return; + + /* If this is an uncached page, discard it. */ + if (F_ISSET(btree, WT_BTREE_NO_CACHE)) { + page->ref->page = NULL; + page->ref->state = WT_REF_DISK; + __wt_page_out(session, &page, 0); + return; + } + + /* Discard our hazard reference. */ + __wt_hazard_clear(session, page); +} + +/* + * __wt_stack_release -- + * Release references to a page stack. + */ +static inline void +__wt_stack_release(WT_SESSION_IMPL *session, WT_PAGE *page) +{ + WT_PAGE *next; + + while (page != NULL && !WT_PAGE_IS_ROOT(page)) { + next = page->parent; + __wt_page_release(session, page); + page = next; + } } /* diff --git a/src/include/cache.i b/src/include/cache.i index 79b99653a78..d06bb556c14 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -58,8 +58,7 @@ __wt_eviction_page_check(WT_SESSION_IMPL *session, WT_PAGE *page) return (0); /* Check the page's memory footprint. */ - if ((int64_t)page->memory_footprint > conn->cache_size / 2 || - page->memory_footprint > 20 * session->btree->maxleafpage) + if ((int64_t)page->memory_footprint > conn->cache_size / 2) return (1); /* diff --git a/src/include/cursor.i b/src/include/cursor.i index 47660419c75..e9a3240684c 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -56,11 +56,9 @@ __cursor_leave(WT_CURSOR_BTREE *cbt) cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - /* Optionally release any page references we're holding. */ - if (cbt->page != NULL) { - __wt_page_release(session, cbt->page); - cbt->page = NULL; - } + /* Release any page references we're holding. */ + __wt_stack_release(session, cbt->page); + cbt->page = NULL; /* Reset the returned key/value state. */ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 367b52a55cd..f7e7aa30760 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -522,15 +522,14 @@ struct __wt_session { * @config{append, append the value as a new record\, creating a new * record number key; valid only for cursors with record number keys.,a * boolean flag; default \c false.} - * @config{bulk, configure the cursor for bulk loads; bulk-load is a - * fast load path for empty objects and only empty objects may be - * bulk-loaded. Cursors configured for bulk load only support the - * WT_CURSOR::insert and WT_CURSOR::close methods.,a boolean flag; - * default \c false.} - * @config{checkpoint, the name of a checkpoint to open; the reserved - * checkpoint name "WiredTigerCheckpoint" opens a cursor on the most - * recent internal checkpoint taken for the object.,a string; default - * empty.} + * @config{bulk, configure the cursor for bulk loads\, a fast load path + * that may only be used for just created objects. Cursors configured + * for bulk load only support the WT_CURSOR::insert and WT_CURSOR::close + * methods.,a boolean flag; default \c false.} + * @config{checkpoint, the name of a checkpoint to open (the reserved + * name "WiredTigerCheckpoint" opens the most recent internal checkpoint + * taken for the object). The cursor does not support data + * modification.,a string; default empty.} * @config{dump, configure the cursor for dump format inputs and * outputs: "hex" selects a simple hexadecimal format\, "print" selects * a format where only non-printing characters are hexadecimal encoded. @@ -542,6 +541,9 @@ struct __wt_session { * configured with next_random only support the WT_CURSOR::next and * WT_CURSOR::close methods. See @ref cursor_random for details.,a * boolean flag; default \c false.} + * @config{no_cache, do not cache pages from the underlying object. The + * cursor does not support data modification.,a boolean flag; default \c + * false.} * @config{overwrite, change the behavior of the cursor's insert method * to overwrite previously existing values.,a boolean flag; default \c * false.} -- cgit v1.2.1 From 9c0e333511c5602d8d7bd6825b7dae189ac8e2e4 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 24 Sep 2012 17:38:58 +1000 Subject: Update handle cache to deal with no-cache cursors and btree handles. Update LSM code to use no-cache handles for merges. --- src/include/extern.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/include') diff --git a/src/include/extern.h b/src/include/extern.h index 363c4c0a990..b856b754bfb 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -589,6 +589,8 @@ extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, extern int __wt_conn_btree_close(WT_SESSION_IMPL *session, int locked); extern int __wt_conn_btree_close_all(WT_SESSION_IMPL *session, const char *name); +extern int __wt_conn_btree_discard_single(WT_SESSION_IMPL *session, + WT_BTREE *btree); extern int __wt_conn_btree_discard(WT_CONNECTION_IMPL *conn); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn); extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn); -- cgit v1.2.1 From f28613804c872557eccdf936ed916260bed62a65 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 25 Sep 2012 12:42:57 +1000 Subject: Make no_cache configuration undocumented. Update calculation for finding maximum cached page size. --- src/include/cache.i | 3 ++- src/include/wiredtiger.in | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'src/include') diff --git a/src/include/cache.i b/src/include/cache.i index d06bb556c14..2813a055414 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -58,7 +58,8 @@ __wt_eviction_page_check(WT_SESSION_IMPL *session, WT_PAGE *page) return (0); /* Check the page's memory footprint. */ - if ((int64_t)page->memory_footprint > conn->cache_size / 2) + if ((int64_t)page->memory_footprint > + conn->cache_size / (2 * (conn->btqcnt + 1))) return (1); /* diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index ab9cd24b10c..1a33ba7c934 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -541,9 +541,6 @@ struct __wt_session { * configured with next_random only support the WT_CURSOR::next and * WT_CURSOR::close methods. See @ref cursor_random for details.,a * boolean flag; default \c false.} - * @config{no_cache, do not cache pages from the underlying object. The - * cursor does not support data modification.,a boolean flag; default \c - * false.} * @config{overwrite, change the behavior of the cursor's insert method * to overwrite previously existing values.,a boolean flag; default \c * false.} -- cgit v1.2.1 From de5376d58326e6cb4de706284edfe2c24629673f Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 25 Sep 2012 17:41:32 +1000 Subject: Update hazard references, so the active array grows as needed. Bump default hazard_max to 1000. --- src/include/api.h | 3 ++- src/include/btree.i | 1 + src/include/misc.h | 3 +++ src/include/wiredtiger.in | 4 ++-- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'src/include') diff --git a/src/include/api.h b/src/include/api.h index 3578e05b073..613fb48293d 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -235,7 +235,8 @@ struct __wt_connection_impl { * WiredTiger allocates space for a fixed number of hazard references * in each thread of control. */ - uint32_t hazard_size; /* Hazard array size */ + uint32_t hazard_size; /* Count of used hazard references */ + uint32_t hazard_max; /* Hazard array size */ WT_CACHE *cache; /* Page cache */ uint64_t cache_size; diff --git a/src/include/btree.i b/src/include/btree.i index 7f778358b1b..028c7f3edd3 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -360,6 +360,7 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { if (!s->active) continue; + WT_ORDERED_READ(session_cnt, conn->hazard_size); for (hp = s->hazard; hp < s->hazard + conn->hazard_size; ++hp) if (hp->page == page) return (hp); diff --git a/src/include/misc.h b/src/include/misc.h index dd34b9aa0dc..6b82155bb08 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -52,6 +52,9 @@ #define WT_SKIP_MAXDEPTH 10 #define WT_SKIP_PROBABILITY (UINT32_MAX >> 2) +/* The number of hazard references that can be in use is grown dynamically. */ +#define WT_HAZARD_INCR 10 + /* * Quiet compiler warnings about unused parameters. */ diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 1a33ba7c934..abedb83e8bd 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1145,8 +1145,8 @@ struct __wt_connection { * may need quoting\, for example\, * extensions=("/path/to/ext.so"="entry=my_entry").,a list of * strings; default empty.} - * @config{hazard_max, number of simultaneous hazard references per session - * handle.,an integer greater than or equal to 15; default \c 30.} + * @config{hazard_max, maximum number of simultaneous hazard references per + * session handle.,an integer greater than or equal to 15; default \c 1000.} * @config{logging, enable logging.,a boolean flag; default \c false.} * @config{multiprocess, permit sharing between processes (will automatically * start an RPC server for primary processes and use RPC for secondary -- cgit v1.2.1 From 249b9bb0cad2c6412a45dbfd4b30b0c2f74989ca Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 25 Sep 2012 17:53:43 +1000 Subject: Line wrapping and spelling fixes. --- src/include/btree.i | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/include') diff --git a/src/include/btree.i b/src/include/btree.i index 028c7f3edd3..b742ebbfc74 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -306,7 +306,7 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page) if (page == NULL || WT_PAGE_IS_ROOT(page)) return; - /* If this is an uncached page, discard it. */ + /* If this is a non cached page, discard it. */ if (F_ISSET(btree, WT_BTREE_NO_CACHE)) { page->ref->page = NULL; page->ref->state = WT_REF_DISK; -- cgit v1.2.1 From 5eba9718d867f42f148b740d061feeffd5a5e4f9 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 26 Sep 2012 01:53:42 +0000 Subject: Move hazard_size from the connection to the session. Each session can have a different count. Also a couple of bug fixes for growing hazard arrays. --- src/include/api.h | 2 +- src/include/btree.i | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/include') diff --git a/src/include/api.h b/src/include/api.h index 613fb48293d..51e28960a58 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -130,6 +130,7 @@ struct __wt_session_impl { * easily call a function to clear memory up to, but not including, the * hazard reference. */ + uint32_t hazard_size; /* Count of used hazard references */ u_int nhazard; #define WT_SESSION_CLEAR(s) memset(s, 0, WT_PTRDIFF(&(s)->hazard, s)) WT_HAZARD *hazard; /* Hazard reference array */ @@ -235,7 +236,6 @@ struct __wt_connection_impl { * WiredTiger allocates space for a fixed number of hazard references * in each thread of control. */ - uint32_t hazard_size; /* Count of used hazard references */ uint32_t hazard_max; /* Hazard array size */ WT_CACHE *cache; /* Page cache */ diff --git a/src/include/btree.i b/src/include/btree.i index b742ebbfc74..e5f01bcb2e6 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -344,7 +344,7 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) WT_CONNECTION_IMPL *conn; WT_HAZARD *hp; WT_SESSION_IMPL *s; - uint32_t i, session_cnt; + uint32_t i, hazard_size, session_cnt; conn = S2C(session); @@ -360,8 +360,8 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { if (!s->active) continue; - WT_ORDERED_READ(session_cnt, conn->hazard_size); - for (hp = s->hazard; hp < s->hazard + conn->hazard_size; ++hp) + WT_ORDERED_READ(hazard_size, s->hazard_size); + for (hp = s->hazard; hp < s->hazard + hazard_size; ++hp) if (hp->page == page) return (hp); } -- cgit v1.2.1 From 30cee0a04e383c85d01547ca894b7805fa9f68fa Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 26 Sep 2012 14:57:31 +1000 Subject: Update btree file count to only include open writable files. --- src/include/api.h | 2 +- src/include/cache.i | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/include') diff --git a/src/include/api.h b/src/include/api.h index 51e28960a58..5bc5f751163 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -214,7 +214,7 @@ struct __wt_connection_impl { /* Locked: library list */ TAILQ_HEAD(__wt_dlh_qh, __wt_dlh) dlhqh; - u_int btqcnt; /* Locked: btree count */ + u_int open_btree_count; /* Locked: open writable btree count */ u_int next_file_id; /* Locked: file ID counter */ /* diff --git a/src/include/cache.i b/src/include/cache.i index 2813a055414..ac4d46e00fa 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -57,9 +57,13 @@ __wt_eviction_page_check(WT_SESSION_IMPL *session, WT_PAGE *page) F_ISSET(session->btree, WT_BTREE_NO_EVICTION)) return (0); - /* Check the page's memory footprint. */ + /* + * Check the page's memory footprint - evict pages that take up more + * than their fair share of the cache. We define a fair share as + * approximately half the cache size per open writable btree handle. + */ if ((int64_t)page->memory_footprint > - conn->cache_size / (2 * (conn->btqcnt + 1))) + conn->cache_size / (2 * (conn->open_btree_count + 1))) return (1); /* -- cgit v1.2.1