summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2015-12-16 12:37:37 -0500
committerKeith Bostic <keith@wiredtiger.com>2015-12-16 12:37:37 -0500
commit6c144b89c56b1fd39c53ca3f1fa878155bc38ef8 (patch)
tree69e3421995b4fb2c8dde9ba1caf3a9022780b869
parent6dd42eeb7bf38de24f9888e065dd59f82d7a3d7e (diff)
downloadmongo-6c144b89c56b1fd39c53ca3f1fa878155bc38ef8.tar.gz
WT-2290: when re-writing a multi-block page for compaction, with
the goal of moving the block to a new location in the file, the reconciliation code to avoid writing blocks that already exist on disk was working correctly, and we were skipping writes, causing the compaction to fail. Turn the compaction success/failure boolean into three states: not-running, running, and running-successfully, then change reconciliation to ignore block reuse if compaction is running.
-rw-r--r--src/btree/bt_compact.c3
-rw-r--r--src/include/session.h5
-rw-r--r--src/reconcile/rec_write.c9
-rw-r--r--src/session/session_compact.c8
-rw-r--r--test/suite/test_compact02.py10
5 files changed, 23 insertions, 12 deletions
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index 8044d4f852d..45199fd90fe 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -139,7 +139,8 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
if (skip)
continue;
- session->compaction = true;
+ session->compact_state = WT_COMPACT_SUCCESS;
+
/* Rewrite the page: mark the page and tree dirty. */
WT_ERR(__wt_page_modify_init(session, ref->page));
__wt_page_modify_set(session, ref->page);
diff --git a/src/include/session.h b/src/include/session.h
index 5c3bcfb8ed0..b488f62c8d5 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -74,7 +74,10 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
TAILQ_HEAD(__cursors, __wt_cursor) cursors;
WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */
- WT_COMPACT *compact; /* Compact state */
+
+ WT_COMPACT *compact; /* Compaction information */
+ enum { WT_COMPACT_NONE=0,
+ WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state;
/*
* Lookaside table cursor, sweep and eviction worker threads only.
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index b31e98422f5..4ce3d63edfc 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -3265,7 +3265,14 @@ supd_check_complete:
memset(WT_BLOCK_HEADER_REF(dsk), 0, btree->block_header);
bnd->cksum = __wt_cksum(buf->data, buf->size);
- if (mod->rec_result == WT_PM_REC_MULTIBLOCK &&
+ /*
+ * One last check: don't reuse blocks if compacting, the reason
+ * for compaction is to move blocks to different locations. We
+ * do this check after calculating the checksums, hopefully the
+ * next write can be skipped.
+ */
+ if (session->compact_state == WT_COMPACT_NONE &&
+ mod->rec_result == WT_PM_REC_MULTIBLOCK &&
mod->mod_multi_entries > bnd_slot) {
multi = &mod->mod_multi[bnd_slot];
if (multi->size == bnd->size &&
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 456fcd3ce03..8a5b741c0c5 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -172,12 +172,12 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
for (i = 0; i < 100; ++i) {
WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg));
- session->compaction = false;
+ session->compact_state = WT_COMPACT_RUNNING;
WT_WITH_SCHEMA_LOCK(session,
ret = __wt_schema_worker(
session, uri, __wt_compact, NULL, cfg, 0));
WT_ERR(ret);
- if (!session->compaction)
+ if (session->compact_state != WT_COMPACT_SUCCESS)
break;
WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg));
@@ -185,7 +185,9 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
WT_ERR(__session_compact_check_timeout(session, start_time));
}
-err: __wt_scr_free(session, &t);
+err: session->compact_state = WT_COMPACT_NONE;
+
+ __wt_scr_free(session, &t);
return (ret);
}
diff --git a/test/suite/test_compact02.py b/test/suite/test_compact02.py
index f4257a52296..1481eebb881 100644
--- a/test/suite/test_compact02.py
+++ b/test/suite/test_compact02.py
@@ -53,7 +53,7 @@ class test_compact02(wttest.WiredTigerTestCase):
# 1. Create a table with the data, alternating record size.
# 2. Checkpoint and get stats on the table to confirm the size.
# 3. Delete the half of the records with the larger record size.
- # 4. Reopen the connection to force the file to disk.
+ # 4. Checkpoint to make the deleted records available for reuse.
# 5. Get stats on table.
# 6. Call compact.
# 7. Get stats on compacted table.
@@ -115,11 +115,9 @@ class test_compact02(wttest.WiredTigerTestCase):
c.close()
self.pr('Removed total ' + str((count * 9666) / mb) + 'MB')
- # 4. Reopen the connection to force the file to disk (if we leave dirty
- # blocks in the cache, it can affect how compact works depending on how
- # and when those blocks are written, because they're being written best
- # fit, not first-fit, as compaction does).
- self.reopen_conn()
+ # 4. Checkpoint to make the deleted records available for reuse, so
+ # compaction can be successful.
+ self.session.checkpoint()
# 5. Get stats on table.
sz = self.get_size()