diff options
author | Etienne Petrel <etienne.petrel@mongodb.com> | 2021-10-19 06:19:36 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-19 07:35:46 +0000 |
commit | 7fc844336b4650cea78c19990a14bb453b6f7b14 (patch) | |
tree | df51ff34c1d838bcc942f84b9f46c8749908734a /src/third_party/wiredtiger/src/block/block_compact.c | |
parent | 1da364de3bee98e82fe180dad1e0983d4b12f9a2 (diff) | |
download | mongo-7fc844336b4650cea78c19990a14bb453b6f7b14.tar.gz |
Import wiredtiger: 70ab26de2ab263fabab39114aee583f632f4e088 from branch mongodb-master
ref: bfcac76ea0..70ab26de2a
for: 5.2.0
WT-6001 Avoid reading the page into cache if it needs to be rewritten
Diffstat (limited to 'src/third_party/wiredtiger/src/block/block_compact.c')
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_compact.c | 154 |
1 files changed, 139 insertions, 15 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c index f6be40cfa78..28076e856b4 100644 --- a/src/third_party/wiredtiger/src/block/block_compact.c +++ b/src/third_party/wiredtiger/src/block/block_compact.c @@ -24,9 +24,10 @@ __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block) /* Reset the compaction state information. */ block->compact_pct_tenths = 0; + block->compact_blocks_moved = 0; + block->compact_cache_pages_dealt = 0; block->compact_pages_reviewed = 0; block->compact_pages_skipped = 0; - block->compact_pages_written = 0; return (0); } @@ -51,6 +52,32 @@ __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) } /* + * __wt_block_compact_progress -- + * Output compact progress message. + */ +void +__wt_block_compact_progress(WT_SESSION_IMPL *session, WT_BLOCK *block, u_int *msg_countp) +{ + struct timespec cur_time; + uint64_t time_diff; + + if (!WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS)) + return; + + __wt_epoch(session, &cur_time); + + /* Log one progress message every twenty seconds. */ + time_diff = WT_TIMEDIFF_SEC(cur_time, session->compact->begin); + if (time_diff / WT_PROGRESS_MSG_PERIOD > *msg_countp) { + ++*msg_countp; + __wt_verbose(session, WT_VERB_COMPACT_PROGRESS, + " compacting %s for %" PRIu64 " seconds; reviewed %" PRIu64 " pages, skipped %" PRIu64 + " pages, cache pages evicted %" PRIu64 ", on-disk pages moved %" PRIu64, + block->name, time_diff, block->compact_pages_reviewed, block->compact_pages_skipped, + block->compact_cache_pages_dealt, block->compact_blocks_moved); + } +} +/* * __wt_block_compact_skip -- * Return if compaction will shrink the file. */ @@ -117,7 +144,7 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) "%s: total reviewed %" PRIu64 " pages, total skipped %" PRIu64 " pages, total wrote %" PRIu64 " pages", block->name, block->compact_pages_reviewed, block->compact_pages_skipped, - block->compact_pages_written); + block->compact_cache_pages_dealt); __wt_verbose(session, WT_VERB_COMPACT, "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first 80%% of the file", block->name, (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty); @@ -136,27 +163,22 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) } /* - * __wt_block_compact_page_skip -- + * __compact_page_skip -- * Return if writing a particular page will shrink the file. */ -int -__wt_block_compact_page_skip( - WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp) +static void +__compact_page_skip( + WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, uint32_t size, bool *skipp) { WT_EXT *ext; WT_EXTLIST *el; - wt_off_t limit, offset; - uint32_t checksum, objectid, size; + wt_off_t limit; *skipp = true; /* Return a default skip. */ - /* Crack the cookie. */ - WT_RET(__wt_block_addr_unpack( - session, block, addr, addr_size, &objectid, &offset, &size, &checksum)); - /* * If this block is in the chosen percentage of the file and there's a block on the available - * list that's appears before that percentage of the file, rewrite the block. Checking the + * list that appears before that percentage of the file, rewrite the block. Checking the * available list is necessary (otherwise writing the block would extend the file), but there's * an obvious race if the file is sufficiently busy. */ @@ -174,17 +196,117 @@ __wt_block_compact_page_skip( } } __wt_spin_unlock(session, &block->live_lock); +} + +/* + * __wt_block_compact_page_skip -- + * Return if writing a particular page will shrink the file. + */ +int +__wt_block_compact_page_skip( + WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp) +{ + wt_off_t offset; + uint32_t size, checksum, objectid; + + WT_UNUSED(addr_size); + *skipp = true; /* Return a default skip. */ + offset = 0; + + /* Crack the cookie. */ + WT_RET(__wt_block_addr_unpack( + session, block, addr, addr_size, &objectid, &offset, &size, &checksum)); + + __compact_page_skip(session, block, offset, size, skipp); ++block->compact_pages_reviewed; if (*skipp) ++block->compact_pages_skipped; else - ++block->compact_pages_written; + ++block->compact_cache_pages_dealt; return (0); } /* + * __wt_block_compact_page_rewrite -- + * Rewrite a page if it will shrink the file. + */ +int +__wt_block_compact_page_rewrite( + WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *skipp) +{ + WT_DECL_ITEM(tmp); + WT_DECL_RET; + wt_off_t offset, new_offset; + uint32_t size, checksum, objectid; + uint8_t *endp; + bool discard_block; + + *skipp = true; /* Return a default skip. */ + new_offset = 0; /* -Werror=maybe-uninitialized */ + + discard_block = false; + + WT_ERR(__wt_block_addr_unpack( + session, block, addr, *addr_sizep, &objectid, &offset, &size, &checksum)); + + /* Check if the block is worth rewriting. */ + __compact_page_skip(session, block, offset, size, skipp); + + if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT) || + WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS)) { + ++block->compact_pages_reviewed; + if (*skipp) + ++block->compact_pages_skipped; + else + ++block->compact_blocks_moved; + } + if (*skipp) + return (0); + + /* Read the block. */ + WT_ERR(__wt_scr_alloc(session, size, &tmp)); + WT_ERR(__wt_read(session, block->fh, offset, size, tmp->mem)); + + /* Allocate a replacement block. */ + WT_ERR(__wt_block_ext_prealloc(session, 5)); + __wt_spin_lock(session, &block->live_lock); + ret = __wt_block_alloc(session, block, &new_offset, (wt_off_t)size); + __wt_spin_unlock(session, &block->live_lock); + WT_ERR(ret); + discard_block = true; + + /* Write the block. */ + WT_ERR(__wt_write(session, block->fh, new_offset, size, tmp->mem)); + + /* Free the original block. */ + __wt_spin_lock(session, &block->live_lock); + ret = __wt_block_off_free(session, block, objectid, offset, (wt_off_t)size); + __wt_spin_unlock(session, &block->live_lock); + WT_ERR(ret); + + /* Build the returned address cookie. */ + endp = addr; + WT_ERR(__wt_block_addr_pack(block, &endp, objectid, new_offset, size, checksum)); + *addr_sizep = WT_PTRDIFF(endp, addr); + + WT_STAT_CONN_INCR(session, block_write); + WT_STAT_CONN_INCRV(session, block_byte_write, size); + + discard_block = false; + +err: + if (discard_block) { + __wt_spin_lock(session, &block->live_lock); + WT_TRET(__wt_block_off_free(session, block, objectid, new_offset, (wt_off_t)size)); + __wt_spin_unlock(session, &block->live_lock); + } + __wt_scr_free(session, &tmp); + return (ret); +} + +/* * __block_dump_bucket_stat -- * Dump out the information about available and used blocks in the given bucket (part of the * file). @@ -237,8 +359,10 @@ __block_dump_file_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start) session, WT_VERB_COMPACT, "pages reviewed: %" PRIu64, block->compact_pages_reviewed); __wt_verbose( session, WT_VERB_COMPACT, "pages skipped: %" PRIu64, block->compact_pages_skipped); + __wt_verbose(session, WT_VERB_COMPACT, + "cache pages read/flushed out of the cache: %" PRIu64, block->compact_cache_pages_dealt); __wt_verbose( - session, WT_VERB_COMPACT, "pages written: %" PRIu64, block->compact_pages_written); + session, WT_VERB_COMPACT, "blocks moved : %" PRIu64, block->compact_blocks_moved); } __wt_verbose(session, WT_VERB_COMPACT, |