summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2016-09-21 02:12:31 -0400
committerMichael Cahill <michael.cahill@mongodb.com>2016-09-21 16:12:31 +1000
commit1c2c9268e122c4da1e49f14b7b17f20183b1a991 (patch)
tree75955cff11a5005300c52d12b48f0b849833d026 /src
parent7652a55a509721813d94e09fba9733a72d2db788 (diff)
downloadmongo-1c2c9268e122c4da1e49f14b7b17f20183b1a991.tar.gz
WT-2923 heap-use-after-free on address in compaction (#3053)
We can't look at the WT_REF address without some kind of lock as the address can change underneath us. For example, if we take a copy of WT_REF.addr.addr while it's referencing an on-page cell, and then the page is evicted before we crack that address, we'll potentially read freed memory. Lock down the WT_REF before reading the address.
Diffstat (limited to 'src')
-rw-r--r--src/btree/bt_compact.c56
-rw-r--r--src/btree/bt_walk.c25
2 files changed, 51 insertions, 30 deletions
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index 7ba45f29b76..e005674762d 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -171,30 +171,64 @@ int
__wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
WT_BM *bm;
+ WT_DECL_RET;
size_t addr_size;
u_int type;
const uint8_t *addr;
- *skipp = false; /* Default to reading. */
- type = 0; /* Keep compiler quiet. */
+ /*
+ * Skip deleted pages, rewriting them doesn't seem useful; in a better
+ * world we'd write the parent to delete the page.
+ */
+ if (ref->state == WT_REF_DELETED) {
+ *skipp = true;
+ return (0);
+ }
- bm = S2BT(session)->bm;
+ *skipp = false; /* Default to reading */
/*
- * We aren't holding a hazard pointer, so we can't look at the page
- * itself, all we can look at is the WT_REF information. If there's no
- * address, the page isn't on disk, but we have to read internal pages
- * to walk the tree regardless; throw up our hands and read it.
+ * If the page is in-memory, we want to look at it (it may have been
+ * modified and written, and the current location is the interesting
+ * one in terms of compaction, not the original location).
+ *
+ * This test could be combined with the next one, but this is a cheap
+ * test and the next one is expensive.
*/
- __wt_ref_info(ref, &addr, &addr_size, &type);
- if (addr == NULL)
+ if (ref->state != WT_REF_DISK)
+ return (0);
+
+ /*
+ * There's nothing to prevent the WT_REF state from changing underfoot,
+ * which can change its address. For example, the WT_REF address might
+ * reference an on-page cell, and page eviction can free that memory.
+ * Lock the WT_REF so we can look at its address.
+ */
+ if (!__wt_atomic_casv32(&ref->state, WT_REF_DISK, WT_REF_LOCKED))
return (0);
/*
+ * The page is on disk, so there had better be an address; assert that
+ * fact, test at run-time to avoid the core dump.
+ *
* Internal pages must be read to walk the tree; ask the block-manager
* if it's useful to rewrite leaf pages, don't do the I/O if a rewrite
* won't help.
*/
- return (type == WT_CELL_ADDR_INT ? 0 :
- bm->compact_page_skip(bm, session, addr, addr_size, skipp));
+ __wt_ref_info(ref, &addr, &addr_size, &type);
+ WT_ASSERT(session, addr != NULL);
+ if (addr != NULL && type != WT_CELL_ADDR_INT) {
+ bm = S2BT(session)->bm;
+ ret = bm->compact_page_skip(
+ bm, session, addr, addr_size, skipp);
+ }
+
+ /*
+ * Reset the WT_REF state and push the change. The full-barrier isn't
+ * necessary, but it's better to keep pages in circulation than not.
+ */
+ ref->state = WT_REF_DISK;
+ WT_FULL_BARRIER();
+
+ return (ret);
}
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index dc1cf4e7f98..fb0d2296823 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -472,27 +472,14 @@ restart: /*
empty_internal = false;
} else if (LF_ISSET(WT_READ_COMPACT)) {
/*
- * Skip deleted pages, rewriting them doesn't
- * seem useful.
+ * Compaction has relatively complex tests to
+ * decide if a page can be skipped, call out
+ * to a helper function.
*/
- if (ref->state == WT_REF_DELETED)
+ WT_ERR(__wt_compact_page_skip(
+ session, ref, &skip));
+ if (skip)
break;
-
- /*
- * If the page is in-memory, we want to look at
- * it (it may have been modified and written,
- * and the current location is the interesting
- * one in terms of compaction, not the original
- * location). If the page isn't in-memory, test
- * if the page will help with compaction, don't
- * read it if we don't have to.
- */
- if (ref->state == WT_REF_DISK) {
- WT_ERR(__wt_compact_page_skip(
- session, ref, &skip));
- if (skip)
- break;
- }
} else {
/*
* Try to skip deleted pages visible to us.