diff options
author | Ramon Fernandez <ramon@mongodb.com> | 2016-04-15 10:35:54 +0100 |
---|---|---|
committer | Ramon Fernandez <ramon@mongodb.com> | 2016-04-15 10:36:41 +0100 |
commit | d0324043bc99a713961e1fca0ffc8ea4b124d959 (patch) | |
tree | 579d4772da3420d74d39b38e9f84ca244aa37f29 | |
parent | 3edc84475b10154a76f268edb5e80ac6ca609411 (diff) | |
download | mongo-d0324043bc99a713961e1fca0ffc8ea4b124d959.tar.gz |
Import wiredtiger-wiredtiger-mongodb-3.0.9-9-gf6286c2.tar.gz from wiredtiger branch mongodb-3.0
ref: 3dbc6c6..f6286c2
SERVER-22831 Low query rate with heavy cache pressure and an idle collection
SERVER-23457 WiredTiger changes for MongoDB 3.0.12
WT-2157 test/format corrupted cell failure
WT-2361 column-store starting record number error
WT-2451 Allow eviction of metadata
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_handle.c | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_page.c | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_split.c | 9 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_sync.c | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_lru.c | 14 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 27 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/suite/test_schema07.py | 57 |
7 files changed, 92 insertions, 27 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index f9ddc7f2dd3..00452af549d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -254,7 +254,11 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) /* Eviction; the metadata file is never evicted. */ if (WT_IS_METADATA(btree->dhandle)) - F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); + /* + * Special settings for metadata: skew eviction so metadata + * almost always stays in cache + */ + btree->evict_priority = WT_EVICT_INT_SKEW; else { WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval)); if (cval.val) diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index ad8f0293108..4531538de7d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -135,7 +135,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags /* * Forcibly evict pages that are too big. */ - if (force_attempts < 10 && + if (!F_ISSET(session, WT_SESSION_NO_CACHE_CHECK) && + force_attempts < 10 && __evict_force_check(session, page, flags)) { ++force_attempts; ret = __wt_page_release_evict(session, ref); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 6e0436bb01f..7f3620bb361 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -1289,6 +1289,15 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) __wt_page_modify_set(session, right); /* + * We perform insert splits concurrently with checkpoints, where the + * requirement is a checkpoint must include either the original page + * or both new pages. The page we're splitting is dirty, but that's + * insufficient: set the first dirty transaction to an impossibly old + * value so this page is not skipped by a checkpoint. + */ + page->modify->first_dirty_txn = WT_TXN_FIRST; + + /* * We modified the page above, which will have set the first dirty * transaction to the last transaction current running. However, the * updates we installed may be older than that. Set the first dirty diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 5d5ad491145..aed58f4a3fe 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -164,8 +164,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) */ if (!WT_PAGE_IS_INTERNAL(page) && F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) && - WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn) && - mod->rec_result != WT_PM_REC_REWRITE) { + WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn)) { __wt_page_modify_set(session, page); continue; } diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 2ae3dd6b9a5..47917289503 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -1213,7 +1213,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags) continue; /* Pages we no longer need (clean or dirty), are found money. */ - if (__wt_page_is_empty(page)) + if (__wt_page_is_empty(page) || + F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || + page->read_gen == WT_READGEN_OLDEST) goto fast; /* Optionally ignore clean pages. */ @@ -1225,14 +1227,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags) * eviction, skip anything that isn't marked. */ if (LF_ISSET(WT_EVICT_PASS_WOULD_BLOCK) && - page->memory_footprint < btree->maxmempage && - page->read_gen != WT_READGEN_OLDEST) + page->memory_footprint < (btree->maxmempage * 8) / 10) continue; /* Limit internal pages to 50% unless we get aggressive. */ if (WT_PAGE_IS_INTERNAL(page) && - ++internal_pages > WT_EVICT_WALK_PER_FILE / 2 && - !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE)) + !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE) && + internal_pages >= (int)(evict - start) / 2) continue; /* @@ -1282,6 +1283,9 @@ fast: /* If the page can't be evicted, give up. */ __evict_init_candidate(session, evict, ref); ++evict; + if (WT_PAGE_IS_INTERNAL(page)) + ++internal_pages; + WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER, "select: %p, size %" PRIu64, page, page->memory_footprint)); } diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index a2a8a330c1d..7644eedc7e1 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -29,7 +29,6 @@ typedef struct { /* Track whether all changes to the page are written. */ uint64_t max_txn; - uint64_t skipped_txn; uint32_t orig_write_gen; /* @@ -281,7 +280,7 @@ typedef struct { } WT_RECONCILE; static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool); -static void __rec_cell_build_addr( +static void __rec_cell_build_addr(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, u_int, uint64_t); static int __rec_cell_build_int_key(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, bool *); @@ -719,12 +718,6 @@ __rec_write_init(WT_SESSION_IMPL *session, /* Save the page's write generation before reading the page. */ WT_ORDERED_READ(r->orig_write_gen, page->modify->write_gen); - /* - * Running transactions may update the page after we write it, so - * this is the highest ID we can be confident we will see. - */ - r->skipped_txn = S2C(session)->txn_global.last_running; - return (0); } @@ -901,9 +894,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, max_txn = txnid; if (WT_TXNID_LT(txnid, min_txn)) min_txn = txnid; - if (WT_TXNID_LT(txnid, r->skipped_txn) && - !__wt_txn_visible_all(session, txnid)) - r->skipped_txn = txnid; /* * Record whether any updates were skipped on the way to finding @@ -3373,7 +3363,8 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) val->cell_len = 0; val->len = val->buf.size; } else - __rec_cell_build_addr(r, addr->addr, addr->size, + __rec_cell_build_addr(session, r, + addr->addr, addr->size, __rec_vtype(addr), ref->key.recno); WT_CHILD_RELEASE_ERR(session, hazard, ref); @@ -3419,7 +3410,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* Build the value cell. */ addr = &multi->addr; - __rec_cell_build_addr(r, + __rec_cell_build_addr(session, r, addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ @@ -4222,7 +4213,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) vtype = state == WT_CHILD_PROXY ? WT_CELL_ADDR_DEL : (u_int)vpack->raw; } - __rec_cell_build_addr(r, p, size, vtype, 0); + __rec_cell_build_addr(session, r, p, size, vtype, 0); WT_CHILD_RELEASE_ERR(session, hazard, ref); /* @@ -4308,7 +4299,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = false; addr = &multi->addr; - __rec_cell_build_addr( + __rec_cell_build_addr(session, r, addr->addr, addr->size, __rec_vtype(addr), 0); /* Boundary: split or write the page. */ @@ -5086,8 +5077,6 @@ err: __wt_scr_free(session, &tkey); * discarded. */ if (r->leave_dirty) { - mod->first_dirty_txn = r->skipped_txn; - btree->modified = 1; WT_FULL_BARRIER(); } else { @@ -5394,13 +5383,15 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, * on the page. */ static void -__rec_cell_build_addr(WT_RECONCILE *r, +__rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *addr, size_t size, u_int cell_type, uint64_t recno) { WT_KV *val; val = &r->v; + WT_ASSERT(session, size != 0 || cell_type == WT_CELL_ADDR_DEL); + /* * We don't check the address size because we can't store an address on * an overflow page: if the address won't fit, the overflow page's diff --git a/src/third_party/wiredtiger/test/suite/test_schema07.py b/src/third_party/wiredtiger/test/suite/test_schema07.py new file mode 100644 index 00000000000..67a46fb8cf0 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_schema07.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest + +# test_schema07.py +# Test that long-running tests don't fill the cache with metadata +class test_schema07(wttest.WiredTigerTestCase): + tablename = 'table:test_schema07' + + def setUpConnectionOpen(self, dir): + wtopen_args = 'create,cache_size=10MB' + conn = wiredtiger.wiredtiger_open(dir, wtopen_args) + self.pr(`conn`) + return conn + + @wttest.longtest("Creating many tables shouldn't fill the cache") + def test_many_tables(self): + s = self.session + # We have a 10MB cache, metadata is (well) over 512B per table, + # if we can create 20K tables, something must be cleaning up. + for i in xrange(20000): + uri = '%s-%06d' % (self.tablename, i) + s.create(uri) + c = s.open_cursor(uri) + # This will block if the metadata fills the cache + c["key"] = "value" + c.close() + self.session.drop(uri) + +if __name__ == '__main__': + wttest.run() |