summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/block/block_compact.c
diff options
context:
space:
mode:
authorEtienne Petrel <etienne.petrel@mongodb.com>2021-10-19 06:19:36 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-19 07:35:46 +0000
commit7fc844336b4650cea78c19990a14bb453b6f7b14 (patch)
treedf51ff34c1d838bcc942f84b9f46c8749908734a /src/third_party/wiredtiger/src/block/block_compact.c
parent1da364de3bee98e82fe180dad1e0983d4b12f9a2 (diff)
downloadmongo-7fc844336b4650cea78c19990a14bb453b6f7b14.tar.gz
Import wiredtiger: 70ab26de2ab263fabab39114aee583f632f4e088 from branch mongodb-master
ref: bfcac76ea0..70ab26de2a for: 5.2.0 WT-6001 Avoid reading the page into cache if it needs to be rewritten
Diffstat (limited to 'src/third_party/wiredtiger/src/block/block_compact.c')
-rw-r--r--src/third_party/wiredtiger/src/block/block_compact.c154
1 files changed, 139 insertions, 15 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c
index f6be40cfa78..28076e856b4 100644
--- a/src/third_party/wiredtiger/src/block/block_compact.c
+++ b/src/third_party/wiredtiger/src/block/block_compact.c
@@ -24,9 +24,10 @@ __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
/* Reset the compaction state information. */
block->compact_pct_tenths = 0;
+ block->compact_blocks_moved = 0;
+ block->compact_cache_pages_dealt = 0;
block->compact_pages_reviewed = 0;
block->compact_pages_skipped = 0;
- block->compact_pages_written = 0;
return (0);
}
@@ -51,6 +52,32 @@ __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
}
/*
+ * __wt_block_compact_progress --
+ * Output compact progress message.
+ */
+void
+__wt_block_compact_progress(WT_SESSION_IMPL *session, WT_BLOCK *block, u_int *msg_countp)
+{
+ struct timespec cur_time;
+ uint64_t time_diff;
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS))
+ return;
+
+ __wt_epoch(session, &cur_time);
+
+ /* Log one progress message every twenty seconds. */
+ time_diff = WT_TIMEDIFF_SEC(cur_time, session->compact->begin);
+ if (time_diff / WT_PROGRESS_MSG_PERIOD > *msg_countp) {
+ ++*msg_countp;
+ __wt_verbose(session, WT_VERB_COMPACT_PROGRESS,
+ " compacting %s for %" PRIu64 " seconds; reviewed %" PRIu64 " pages, skipped %" PRIu64
+ " pages, cache pages evicted %" PRIu64 ", on-disk pages moved %" PRIu64,
+ block->name, time_diff, block->compact_pages_reviewed, block->compact_pages_skipped,
+ block->compact_cache_pages_dealt, block->compact_blocks_moved);
+ }
+}
+/*
* __wt_block_compact_skip --
* Return if compaction will shrink the file.
*/
@@ -117,7 +144,7 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
"%s: total reviewed %" PRIu64 " pages, total skipped %" PRIu64 " pages, total wrote %" PRIu64
" pages",
block->name, block->compact_pages_reviewed, block->compact_pages_skipped,
- block->compact_pages_written);
+ block->compact_cache_pages_dealt);
__wt_verbose(session, WT_VERB_COMPACT,
"%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first 80%% of the file",
block->name, (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty);
@@ -136,27 +163,22 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
}
/*
- * __wt_block_compact_page_skip --
+ * __compact_page_skip --
* Return if writing a particular page will shrink the file.
*/
-int
-__wt_block_compact_page_skip(
- WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp)
+static void
+__compact_page_skip(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, uint32_t size, bool *skipp)
{
WT_EXT *ext;
WT_EXTLIST *el;
- wt_off_t limit, offset;
- uint32_t checksum, objectid, size;
+ wt_off_t limit;
*skipp = true; /* Return a default skip. */
- /* Crack the cookie. */
- WT_RET(__wt_block_addr_unpack(
- session, block, addr, addr_size, &objectid, &offset, &size, &checksum));
-
/*
* If this block is in the chosen percentage of the file and there's a block on the available
- * list that's appears before that percentage of the file, rewrite the block. Checking the
+ * list that appears before that percentage of the file, rewrite the block. Checking the
* available list is necessary (otherwise writing the block would extend the file), but there's
* an obvious race if the file is sufficiently busy.
*/
@@ -174,17 +196,117 @@ __wt_block_compact_page_skip(
}
}
__wt_spin_unlock(session, &block->live_lock);
+}
+
+/*
+ * __wt_block_compact_page_skip --
+ * Return if writing a particular page will shrink the file.
+ */
+int
+__wt_block_compact_page_skip(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp)
+{
+ wt_off_t offset;
+ uint32_t size, checksum, objectid;
+
+ WT_UNUSED(addr_size);
+ *skipp = true; /* Return a default skip. */
+ offset = 0;
+
+ /* Crack the cookie. */
+ WT_RET(__wt_block_addr_unpack(
+ session, block, addr, addr_size, &objectid, &offset, &size, &checksum));
+
+ __compact_page_skip(session, block, offset, size, skipp);
++block->compact_pages_reviewed;
if (*skipp)
++block->compact_pages_skipped;
else
- ++block->compact_pages_written;
+ ++block->compact_cache_pages_dealt;
return (0);
}
/*
+ * __wt_block_compact_page_rewrite --
+ * Rewrite a page if it will shrink the file.
+ */
+int
+__wt_block_compact_page_rewrite(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *skipp)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ wt_off_t offset, new_offset;
+ uint32_t size, checksum, objectid;
+ uint8_t *endp;
+ bool discard_block;
+
+ *skipp = true; /* Return a default skip. */
+ new_offset = 0; /* -Werror=maybe-uninitialized */
+
+ discard_block = false;
+
+ WT_ERR(__wt_block_addr_unpack(
+ session, block, addr, *addr_sizep, &objectid, &offset, &size, &checksum));
+
+ /* Check if the block is worth rewriting. */
+ __compact_page_skip(session, block, offset, size, skipp);
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT) ||
+ WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS)) {
+ ++block->compact_pages_reviewed;
+ if (*skipp)
+ ++block->compact_pages_skipped;
+ else
+ ++block->compact_blocks_moved;
+ }
+ if (*skipp)
+ return (0);
+
+ /* Read the block. */
+ WT_ERR(__wt_scr_alloc(session, size, &tmp));
+ WT_ERR(__wt_read(session, block->fh, offset, size, tmp->mem));
+
+ /* Allocate a replacement block. */
+ WT_ERR(__wt_block_ext_prealloc(session, 5));
+ __wt_spin_lock(session, &block->live_lock);
+ ret = __wt_block_alloc(session, block, &new_offset, (wt_off_t)size);
+ __wt_spin_unlock(session, &block->live_lock);
+ WT_ERR(ret);
+ discard_block = true;
+
+ /* Write the block. */
+ WT_ERR(__wt_write(session, block->fh, new_offset, size, tmp->mem));
+
+ /* Free the original block. */
+ __wt_spin_lock(session, &block->live_lock);
+ ret = __wt_block_off_free(session, block, objectid, offset, (wt_off_t)size);
+ __wt_spin_unlock(session, &block->live_lock);
+ WT_ERR(ret);
+
+ /* Build the returned address cookie. */
+ endp = addr;
+ WT_ERR(__wt_block_addr_pack(block, &endp, objectid, new_offset, size, checksum));
+ *addr_sizep = WT_PTRDIFF(endp, addr);
+
+ WT_STAT_CONN_INCR(session, block_write);
+ WT_STAT_CONN_INCRV(session, block_byte_write, size);
+
+ discard_block = false;
+
+err:
+ if (discard_block) {
+ __wt_spin_lock(session, &block->live_lock);
+ WT_TRET(__wt_block_off_free(session, block, objectid, new_offset, (wt_off_t)size));
+ __wt_spin_unlock(session, &block->live_lock);
+ }
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
* __block_dump_bucket_stat --
* Dump out the information about available and used blocks in the given bucket (part of the
* file).
@@ -237,8 +359,10 @@ __block_dump_file_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start)
session, WT_VERB_COMPACT, "pages reviewed: %" PRIu64, block->compact_pages_reviewed);
__wt_verbose(
session, WT_VERB_COMPACT, "pages skipped: %" PRIu64, block->compact_pages_skipped);
+ __wt_verbose(session, WT_VERB_COMPACT,
+ "cache pages read/flushed out of the cache: %" PRIu64, block->compact_cache_pages_dealt);
__wt_verbose(
- session, WT_VERB_COMPACT, "pages written: %" PRIu64, block->compact_pages_written);
+ session, WT_VERB_COMPACT, "blocks moved : %" PRIu64, block->compact_blocks_moved);
}
__wt_verbose(session, WT_VERB_COMPACT,