summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamon Fernandez <ramon@mongodb.com>2016-04-15 10:35:54 +0100
committerRamon Fernandez <ramon@mongodb.com>2016-04-15 10:36:41 +0100
commitd0324043bc99a713961e1fca0ffc8ea4b124d959 (patch)
tree579d4772da3420d74d39b38e9f84ca244aa37f29
parent3edc84475b10154a76f268edb5e80ac6ca609411 (diff)
downloadmongo-d0324043bc99a713961e1fca0ffc8ea4b124d959.tar.gz
Import wiredtiger-wiredtiger-mongodb-3.0.9-9-gf6286c2.tar.gz from wiredtiger branch mongodb-3.0
ref: 3dbc6c6..f6286c2 SERVER-22831 Low query rate with heavy cache pressure and an idle collection SERVER-23457 WiredTiger changes for MongoDB 3.0.12 WT-2157 test/format corrupted cell failure WT-2361 column-store starting record number error WT-2451 Allow eviction of metadata
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c9
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c3
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c14
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c27
-rw-r--r--src/third_party/wiredtiger/test/suite/test_schema07.py57
7 files changed, 92 insertions, 27 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index f9ddc7f2dd3..00452af549d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -254,7 +254,11 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
/* Eviction; the metadata file is never evicted. */
if (WT_IS_METADATA(btree->dhandle))
- F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION);
+ /*
+ * Special settings for metadata: skew eviction so metadata
+ * almost always stays in cache
+ */
+ btree->evict_priority = WT_EVICT_INT_SKEW;
else {
WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval));
if (cval.val)
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index ad8f0293108..4531538de7d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -135,7 +135,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
/*
* Forcibly evict pages that are too big.
*/
- if (force_attempts < 10 &&
+ if (!F_ISSET(session, WT_SESSION_NO_CACHE_CHECK) &&
+ force_attempts < 10 &&
__evict_force_check(session, page, flags)) {
++force_attempts;
ret = __wt_page_release_evict(session, ref);
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 6e0436bb01f..7f3620bb361 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1289,6 +1289,15 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_page_modify_set(session, right);
/*
+ * We perform insert splits concurrently with checkpoints, where the
+ * requirement is a checkpoint must include either the original page
+ * or both new pages. The page we're splitting is dirty, but that's
+ * insufficient: set the first dirty transaction to an impossibly old
+ * value so this page is not skipped by a checkpoint.
+ */
+ page->modify->first_dirty_txn = WT_TXN_FIRST;
+
+ /*
* We modified the page above, which will have set the first dirty
* transaction to the last transaction current running. However, the
* updates we installed may be older than that. Set the first dirty
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 5d5ad491145..aed58f4a3fe 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -164,8 +164,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
*/
if (!WT_PAGE_IS_INTERNAL(page) &&
F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) &&
- WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn) &&
- mod->rec_result != WT_PM_REC_REWRITE) {
+ WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn)) {
__wt_page_modify_set(session, page);
continue;
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 2ae3dd6b9a5..47917289503 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -1213,7 +1213,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
continue;
/* Pages we no longer need (clean or dirty), are found money. */
- if (__wt_page_is_empty(page))
+ if (__wt_page_is_empty(page) ||
+ F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
+ page->read_gen == WT_READGEN_OLDEST)
goto fast;
/* Optionally ignore clean pages. */
@@ -1225,14 +1227,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
* eviction, skip anything that isn't marked.
*/
if (LF_ISSET(WT_EVICT_PASS_WOULD_BLOCK) &&
- page->memory_footprint < btree->maxmempage &&
- page->read_gen != WT_READGEN_OLDEST)
+ page->memory_footprint < (btree->maxmempage * 8) / 10)
continue;
/* Limit internal pages to 50% unless we get aggressive. */
if (WT_PAGE_IS_INTERNAL(page) &&
- ++internal_pages > WT_EVICT_WALK_PER_FILE / 2 &&
- !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE))
+ !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE) &&
+ internal_pages >= (int)(evict - start) / 2)
continue;
/*
@@ -1282,6 +1283,9 @@ fast: /* If the page can't be evicted, give up. */
__evict_init_candidate(session, evict, ref);
++evict;
+ if (WT_PAGE_IS_INTERNAL(page))
+ ++internal_pages;
+
WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
"select: %p, size %" PRIu64, page, page->memory_footprint));
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index a2a8a330c1d..7644eedc7e1 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -29,7 +29,6 @@ typedef struct {
/* Track whether all changes to the page are written. */
uint64_t max_txn;
- uint64_t skipped_txn;
uint32_t orig_write_gen;
/*
@@ -281,7 +280,7 @@ typedef struct {
} WT_RECONCILE;
static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool);
-static void __rec_cell_build_addr(
+static void __rec_cell_build_addr(WT_SESSION_IMPL *,
WT_RECONCILE *, const void *, size_t, u_int, uint64_t);
static int __rec_cell_build_int_key(WT_SESSION_IMPL *,
WT_RECONCILE *, const void *, size_t, bool *);
@@ -719,12 +718,6 @@ __rec_write_init(WT_SESSION_IMPL *session,
/* Save the page's write generation before reading the page. */
WT_ORDERED_READ(r->orig_write_gen, page->modify->write_gen);
- /*
- * Running transactions may update the page after we write it, so
- * this is the highest ID we can be confident we will see.
- */
- r->skipped_txn = S2C(session)->txn_global.last_running;
-
return (0);
}
@@ -901,9 +894,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
max_txn = txnid;
if (WT_TXNID_LT(txnid, min_txn))
min_txn = txnid;
- if (WT_TXNID_LT(txnid, r->skipped_txn) &&
- !__wt_txn_visible_all(session, txnid))
- r->skipped_txn = txnid;
/*
* Record whether any updates were skipped on the way to finding
@@ -3373,7 +3363,8 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
val->cell_len = 0;
val->len = val->buf.size;
} else
- __rec_cell_build_addr(r, addr->addr, addr->size,
+ __rec_cell_build_addr(session, r,
+ addr->addr, addr->size,
__rec_vtype(addr), ref->key.recno);
WT_CHILD_RELEASE_ERR(session, hazard, ref);
@@ -3419,7 +3410,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Build the value cell. */
addr = &multi->addr;
- __rec_cell_build_addr(r,
+ __rec_cell_build_addr(session, r,
addr->addr, addr->size, __rec_vtype(addr), r->recno);
/* Boundary: split or write the page. */
@@ -4222,7 +4213,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
vtype = state == WT_CHILD_PROXY ?
WT_CELL_ADDR_DEL : (u_int)vpack->raw;
}
- __rec_cell_build_addr(r, p, size, vtype, 0);
+ __rec_cell_build_addr(session, r, p, size, vtype, 0);
WT_CHILD_RELEASE_ERR(session, hazard, ref);
/*
@@ -4308,7 +4299,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
r->cell_zero = false;
addr = &multi->addr;
- __rec_cell_build_addr(
+ __rec_cell_build_addr(session,
r, addr->addr, addr->size, __rec_vtype(addr), 0);
/* Boundary: split or write the page. */
@@ -5086,8 +5077,6 @@ err: __wt_scr_free(session, &tkey);
* discarded.
*/
if (r->leave_dirty) {
- mod->first_dirty_txn = r->skipped_txn;
-
btree->modified = 1;
WT_FULL_BARRIER();
} else {
@@ -5394,13 +5383,15 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
* on the page.
*/
static void
-__rec_cell_build_addr(WT_RECONCILE *r,
+__rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
const void *addr, size_t size, u_int cell_type, uint64_t recno)
{
WT_KV *val;
val = &r->v;
+ WT_ASSERT(session, size != 0 || cell_type == WT_CELL_ADDR_DEL);
+
/*
* We don't check the address size because we can't store an address on
* an overflow page: if the address won't fit, the overflow page's
diff --git a/src/third_party/wiredtiger/test/suite/test_schema07.py b/src/third_party/wiredtiger/test/suite/test_schema07.py
new file mode 100644
index 00000000000..67a46fb8cf0
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_schema07.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_schema07.py
+# Test that long-running tests don't fill the cache with metadata
+class test_schema07(wttest.WiredTigerTestCase):
+ tablename = 'table:test_schema07'
+
+ def setUpConnectionOpen(self, dir):
+ wtopen_args = 'create,cache_size=10MB'
+ conn = wiredtiger.wiredtiger_open(dir, wtopen_args)
+ self.pr(`conn`)
+ return conn
+
+ @wttest.longtest("Creating many tables shouldn't fill the cache")
+ def test_many_tables(self):
+ s = self.session
+ # We have a 10MB cache, metadata is (well) over 512B per table,
+ # if we can create 20K tables, something must be cleaning up.
+ for i in xrange(20000):
+ uri = '%s-%06d' % (self.tablename, i)
+ s.create(uri)
+ c = s.open_cursor(uri)
+ # This will block if the metadata fills the cache
+ c["key"] = "value"
+ c.close()
+ self.session.drop(uri)
+
+if __name__ == '__main__':
+ wttest.run()