summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChenhao Qu <chenhao.qu@mongodb.com>2022-11-02 11:57:19 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-02 02:00:20 +0000
commit8eadabfc1c8938ee05eff4cfe5ea6e978afae7a3 (patch)
tree83fdc24b6a1b15490ac3b82f391d60c70741151c
parent9bcb822efc9c0516d882984304822e216fa2ea60 (diff)
downloadmongo-8eadabfc1c8938ee05eff4cfe5ea6e978afae7a3.tar.gz
Import wiredtiger: 8e8d4be3138eba73b4d530b832dcf2cc545e2a64 from branch mongodb-master
ref: 384ced991f..8e8d4be313 for: 6.2.0-rc0 WT-10030 Mark internal page with deleted pages as dirty when reading it into memory
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c27
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/hook_tiered.py1
-rw-r--r--src/third_party/wiredtiger/test/suite/test_truncate19.py119
4 files changed, 148 insertions, 1 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 4091de83cb5..4058cab8ee0 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-master",
- "commit": "384ced991fa662369dbc0eeaa997e3ef817425c6"
+ "commit": "8e8d4be3138eba73b4d530b832dcf2cc545e2a64"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index d4de7ab603d..e6ca2a8cb4a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -638,6 +638,10 @@ static int
__inmem_col_int_init_ref(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *home, uint32_t hint,
void *addr, uint64_t recno, bool internal, bool deleted, WT_PAGE_DELETED *page_del)
{
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+
ref->home = home;
ref->pindex_hint = hint;
ref->addr = addr;
@@ -657,6 +661,16 @@ __inmem_col_int_init_ref(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *home, u
*ref->page_del = *page_del;
}
WT_REF_SET_STATE(ref, WT_REF_DELETED);
+
+ /*
+ * If the tree is already dirty and so will be written, mark the page dirty. (We want to
+ * free the deleted pages, but if the handle is read-only or if the application never
+ * modifies the tree, we're not able to do so.)
+ */
+ if (btree->modified) {
+ WT_RET(__wt_page_modify_init(session, home));
+ __wt_page_only_modify_set(session, home);
+ }
}
return (0);
@@ -811,6 +825,7 @@ __inmem_col_var(
static int
__inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
{
+ WT_BTREE *btree;
WT_CELL_UNPACK_ADDR unpack;
WT_DECL_ITEM(current);
WT_DECL_RET;
@@ -819,6 +834,8 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
uint32_t hint;
bool overflow_keys;
+ btree = S2BT(session);
+
WT_RET(__wt_scr_alloc(session, 0, &current));
/*
@@ -880,6 +897,16 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
}
WT_REF_SET_STATE(ref, WT_REF_DELETED);
+ /*
+ * If the tree is already dirty and so will be written, mark the page dirty. (We want to
+ * free the deleted pages, but if the handle is read-only or if the application never
+ * modifies the tree, we're not able to do so.)
+ */
+ if (btree->modified) {
+ WT_ERR(__wt_page_modify_init(session, page));
+ __wt_page_only_modify_set(session, page);
+ }
+
ref->addr = unpack.cell;
++refp;
break;
diff --git a/src/third_party/wiredtiger/test/suite/hook_tiered.py b/src/third_party/wiredtiger/test/suite/hook_tiered.py
index 0e6e2bce286..98042792da9 100755
--- a/src/third_party/wiredtiger/test/suite/hook_tiered.py
+++ b/src/third_party/wiredtiger/test/suite/hook_tiered.py
@@ -331,6 +331,7 @@ class TieredHookCreator(wthooks.WiredTigerHookCreator):
"test_truncate16.test_truncate16",
"test_truncate18.test_truncate18",
"test_truncate15.test_truncate15",
+ "test_truncate19.test_truncate19",
"test_txn22.test_corrupt_meta",
"test_verbose01.test_verbose_single",
"test_verbose02.test_verbose_single",
diff --git a/src/third_party/wiredtiger/test/suite/test_truncate19.py b/src/third_party/wiredtiger/test/suite/test_truncate19.py
new file mode 100644
index 00000000000..1afda92e738
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_truncate19.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+import os, wttest, suite_random
+from wiredtiger import stat
+from wtdataset import SimpleDataSet
+from wtscenario import make_scenarios
+
+# test_truncate19.py
+#
+# Test to mimic oplog workload in MongoDB. Ensure the deleted pages are
+# cleaned up on disk and we are not using excessive disk space.
+class test_truncate19(wttest.WiredTigerTestCase):
+ conn_config = 'statistics=(all)'
+
+ format_values = [
+ ('string_row', dict(key_format='S', value_format='S')),
+ ]
+ scenarios = make_scenarios(format_values)
+
+ def append_rows(self, uri, ds, start_row, nrows, value):
+ cursor = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ for i in range(start_row, start_row + nrows + 1):
+ cursor[ds.key(i)] = value
+ if i % 2 == 0:
+ self.session.commit_transaction()
+ self.session.begin_transaction()
+ self.session.commit_transaction()
+ cursor.close()
+
+ def do_truncate(self, ds, start_row, nrows):
+ self.session.begin_transaction()
+ hicursor = self.session.open_cursor(ds.uri)
+ hicursor.set_key(ds.key(start_row + nrows))
+ self.session.truncate(None, None, hicursor, None)
+ self.session.commit_transaction()
+
+ def test_truncate19(self):
+ uri = 'table:oplog'
+ nrows = 1000000
+
+ # Create a table.
+ ds = SimpleDataSet(self, uri, 0, key_format=self.key_format, value_format=self.value_format)
+ ds.populate()
+ ds_dummy = SimpleDataSet(self, 'table:dummy', 0, key_format=self.key_format, value_format=self.value_format)
+ ds_dummy.populate()
+
+ value_a = "aaaaa" * 100
+
+ # Write some data
+ self.append_rows(uri, ds, 1, nrows, value_a)
+ self.session.checkpoint()
+
+ # Reopen the database.
+ self.reopen_conn()
+
+ # Session for checkpoint
+ session2 = self.conn.open_session()
+ # Session for long running transaction, to make truncate not globally visible
+ session3 = self.conn.open_session()
+
+ trunc_rows = 0
+ start_num = 1
+ end_num = nrows
+ for i in range(1, 50):
+ # Start a long running transaction
+ session3.begin_transaction()
+ trunc_rows = 10000
+
+ self.do_truncate(ds, start_num, trunc_rows)
+
+ # Check stats to make sure we fast-deleted at least one page.
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ fastdelete_pages = stat_cursor[stat.conn.rec_page_delete_fast][2]
+
+ self.assertGreater(fastdelete_pages, 0)
+
+ # Take a checkpoint.
+ session2.checkpoint()
+ # Ensure the datasize is smaller than 600M
+ self.assertGreater(600000000, os.path.getsize("oplog.wt"))
+ session3.rollback_transaction()
+
+ self.append_rows(uri, ds, end_num, trunc_rows, value_a)
+
+ end_num = end_num + trunc_rows
+ start_num = start_num + trunc_rows
+
+ session2.checkpoint()
+ # Ensure the datasize is smaller than 600M
+ self.assertGreater(600000000, os.path.getsize("oplog.wt"))
+
+if __name__ == '__main__':
+ wttest.run()