summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYingxin <yingxin.cheng@intel.com>2022-01-24 09:37:01 +0800
committerGitHub <noreply@github.com>2022-01-24 09:37:01 +0800
commit375503e5df73a306b6911ccff2f0296d02125ea6 (patch)
tree5b702883ebe7cac34c4abb7d7b468e1606c413c2
parent0f448714c2493b0bfd798dba6ef7bdb0f08282e2 (diff)
parent7c23739c96b79dda4e29f8e9cc49a38d3d1cda4d (diff)
downloadceph-master.tar.gz
Merge pull request #44649 from cyx1231st/wip-crimson-improve-log-cachemaster
crimson/os/seastore: consolidate seastore_t and seastore_cache logs Reviewed-by: Samuel Just <sjust@redhat.com>
-rw-r--r--src/common/subsys.h1
-rw-r--r--src/crimson/os/seastore/cache.cc283
-rw-r--r--src/crimson/os/seastore/cache.h233
-rw-r--r--src/crimson/os/seastore/journal.cc4
-rw-r--r--src/crimson/os/seastore/lba_manager.h4
-rw-r--r--src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc17
-rw-r--r--src/crimson/os/seastore/lba_manager/btree/lba_btree.cc21
-rw-r--r--src/crimson/os/seastore/lba_manager/btree/lba_btree.h4
-rw-r--r--src/crimson/os/seastore/logging.h8
-rw-r--r--src/crimson/os/seastore/transaction.h55
-rw-r--r--src/crimson/os/seastore/transaction_manager.cc32
11 files changed, 392 insertions, 270 deletions
diff --git a/src/common/subsys.h b/src/common/subsys.h
index 0667bbd79af..603a3be9c06 100644
--- a/src/common/subsys.h
+++ b/src/common/subsys.h
@@ -88,6 +88,7 @@ SUBSYS(seastore_onode, 0, 5)
SUBSYS(seastore_odata, 0, 5)
SUBSYS(seastore_omap, 0, 5)
SUBSYS(seastore_tm, 0, 5) // logs below seastore tm
+SUBSYS(seastore_t, 0, 5)
SUBSYS(seastore_cleaner, 0, 5)
SUBSYS(seastore_lba, 0, 5)
SUBSYS(seastore_cache, 0, 5)
diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc
index 4b7ab5495af..39ac510b96b 100644
--- a/src/crimson/os/seastore/cache.cc
+++ b/src/crimson/os/seastore/cache.cc
@@ -30,6 +30,8 @@ Cache::Cache(
lru(crimson::common::get_conf<Option::size_t>(
"seastore_cache_lru_size"))
{
+ LOG_PREFIX(Cache::Cache);
+ INFO("created, lru_size={}", lru.get_capacity());
register_metrics();
}
@@ -37,7 +39,7 @@ Cache::~Cache()
{
LOG_PREFIX(Cache::~Cache);
for (auto &i: extents) {
- ERROR("extent {} still alive", i);
+ ERROR("extent is still alive -- {}", i);
}
ceph_assert(extents.empty());
}
@@ -45,17 +47,19 @@ Cache::~Cache()
Cache::retire_extent_ret Cache::retire_extent_addr(
Transaction &t, paddr_t addr, extent_len_t length)
{
+ LOG_PREFIX(Cache::retire_extent_addr);
+ TRACET("retire {}~{}", t, addr, length);
+
assert(addr.is_real() && !addr.is_block_relative());
- LOG_PREFIX(Cache::retire_extent);
CachedExtentRef ext;
auto result = t.get_extent(addr, &ext);
if (result == Transaction::get_extent_ret::PRESENT) {
- DEBUGT("found {} in t", t, addr);
+ DEBUGT("retire {}~{} on t -- {}", t, addr, length, *ext);
t.add_to_retired_set(CachedExtentRef(&*ext));
return retire_extent_iertr::now();
} else if (result == Transaction::get_extent_ret::RETIRED) {
- ERRORT("{} is already retired", t, addr);
+ ERRORT("retire {}~{} failed, already retired -- {}", t, addr, length, *ext);
ceph_abort();
}
@@ -66,6 +70,7 @@ Cache::retire_extent_ret Cache::retire_extent_addr(
// retiring is not included by the cache hit metrics
ext = query_cache(addr, nullptr);
if (ext) {
+ DEBUGT("retire {}~{} in cache -- {}", t, addr, length, *ext);
if (ext->get_type() != extent_types_t::RETIRED_PLACEHOLDER) {
t.add_to_read_set(ext);
return trans_intr::make_interruptible(
@@ -77,6 +82,7 @@ Cache::retire_extent_ret Cache::retire_extent_addr(
}
// the retired-placeholder exists
} else {
+ DEBUGT("retire {}~{} as placeholder", t, addr, length);
// add a new placeholder to Cache
ext = CachedExtent::make_cached_extent_ref<
RetiredExtentPlaceholder>(length);
@@ -103,6 +109,9 @@ void Cache::dump_contents()
void Cache::register_metrics()
{
+ LOG_PREFIX(Cache::register_metrics);
+ DEBUG("");
+
stats = {};
namespace sm = seastar::metrics;
@@ -212,7 +221,7 @@ void Cache::register_metrics()
{
sm::make_counter(
"invalidated_extents",
- efforts.read.extents,
+ efforts.read.num,
sm::description("extents of invalidated transactions"),
{src_label, read_effort_label}
),
@@ -229,7 +238,7 @@ void Cache::register_metrics()
// non READ invalidated efforts
for (auto& effort_name : invalidated_effort_names) {
- auto& effort = [&effort_name, &efforts]() -> effort_t& {
+ auto& effort = [&effort_name, &efforts]() -> io_stat_t& {
if (effort_name == "READ") {
return efforts.read;
} else if (effort_name == "MUTATE") {
@@ -248,7 +257,7 @@ void Cache::register_metrics()
{
sm::make_counter(
"invalidated_extents",
- effort.extents,
+ effort.num,
sm::description("extents of invalidated transactions"),
{src_label, effort_label(effort_name)}
),
@@ -346,7 +355,7 @@ void Cache::register_metrics()
);
for (auto& effort_name : committed_effort_names) {
auto& effort_by_ext = [&efforts, &effort_name]()
- -> counter_by_extent_t<effort_t>& {
+ -> counter_by_extent_t<io_stat_t>& {
if (effort_name == "READ") {
return efforts.read_by_ext;
} else if (effort_name == "MUTATE") {
@@ -369,7 +378,7 @@ void Cache::register_metrics()
{
sm::make_counter(
"committed_extents",
- effort.extents,
+ effort.num,
sm::description("extents of committed transactions"),
{src_label, effort_label(effort_name), ext_label}
),
@@ -412,7 +421,7 @@ void Cache::register_metrics()
),
sm::make_counter(
"successful_read_extents",
- stats.success_read_efforts.read.extents,
+ stats.success_read_efforts.read.num,
sm::description("extents of successful read transactions")
),
sm::make_counter(
@@ -614,20 +623,18 @@ void Cache::register_metrics()
void Cache::add_extent(CachedExtentRef ref)
{
LOG_PREFIX(Cache::add_extent);
+ TRACE("extent -- {}", *ref);
assert(ref->is_valid());
extents.insert(*ref);
-
if (ref->is_dirty()) {
add_to_dirty(ref);
} else {
touch_extent(*ref);
}
- DEBUG("extent {}", *ref);
}
void Cache::mark_dirty(CachedExtentRef ref)
{
- LOG_PREFIX(Cache::mark_dirty);
if (ref->is_dirty()) {
assert(ref->primary_ref_list_hook.is_linked());
return;
@@ -636,8 +643,6 @@ void Cache::mark_dirty(CachedExtentRef ref)
lru.remove_from_lru(*ref);
add_to_dirty(ref);
ref->state = CachedExtent::extent_state_t::DIRTY;
-
- DEBUG("extent: {}", *ref);
}
void Cache::add_to_dirty(CachedExtentRef ref)
@@ -664,7 +669,7 @@ void Cache::remove_from_dirty(CachedExtentRef ref)
void Cache::remove_extent(CachedExtentRef ref)
{
LOG_PREFIX(Cache::remove_extent);
- DEBUG("extent {}", *ref);
+ TRACE("extent -- {}", *ref);
assert(ref->is_valid());
if (ref->is_dirty()) {
remove_from_dirty(ref);
@@ -678,20 +683,10 @@ void Cache::commit_retire_extent(
Transaction& t,
CachedExtentRef ref)
{
- LOG_PREFIX(Cache::commit_retire_extent);
- DEBUGT("extent {}", t, *ref);
- assert(ref->is_valid());
+ remove_extent(ref);
- // TODO: why does this duplicate remove_extent?
- if (ref->is_dirty()) {
- remove_from_dirty(ref);
- } else {
- lru.remove_from_lru(*ref);
- }
ref->dirty_from_or_retired_at = JOURNAL_SEQ_MAX;
-
invalidate_extent(t, *ref);
- extents.erase(*ref);
}
void Cache::commit_replace_extent(
@@ -699,8 +694,6 @@ void Cache::commit_replace_extent(
CachedExtentRef next,
CachedExtentRef prev)
{
- LOG_PREFIX(Cache::commit_replace_extent);
- DEBUGT("prev {}, next {}", t, *prev, *next);
assert(next->get_paddr() == prev->get_paddr());
assert(next->version == prev->version + 1);
extents.replace(*next, *prev);
@@ -734,16 +727,19 @@ void Cache::invalidate_extent(
Transaction& t,
CachedExtent& extent)
{
- LOG_PREFIX(Cache::invalidate);
- DEBUGT("conflict begin -- extent {}", t, extent);
+ LOG_PREFIX(Cache::invalidate_extent);
+ bool do_conflict_log = true;
for (auto &&i: extent.transactions) {
if (!i.t->conflicted) {
+ if (do_conflict_log) {
+ SUBDEBUGT(seastore_t, "conflict begin -- {}", t, extent);
+ do_conflict_log = false;
+ }
assert(!i.t->is_weak());
account_conflict(t.get_src(), i.t->get_src());
mark_transaction_conflicted(*i.t, extent);
}
}
- DEBUGT("conflict end", t);
extent.state = CachedExtent::extent_state_t::INVALID;
}
@@ -751,8 +747,8 @@ void Cache::mark_transaction_conflicted(
Transaction& t, CachedExtent& conflicting_extent)
{
LOG_PREFIX(Cache::mark_transaction_conflicted);
+ SUBTRACET(seastore_t, "", t);
assert(!t.conflicted);
- DEBUGT("set conflict", t);
t.conflicted = true;
auto& efforts = get_by_src(stats.invalidated_efforts_by_src,
@@ -762,36 +758,37 @@ void Cache::mark_transaction_conflicted(
conflicting_extent.get_type());
++counter;
- efforts.read.extents += t.read_set.size();
+ io_stat_t read_stat;
for (auto &i: t.read_set) {
- efforts.read.bytes += i.ref->get_length();
+ read_stat.increment(i.ref->get_length());
}
+ efforts.read.increment_stat(read_stat);
if (t.get_src() != Transaction::src_t::READ) {
- efforts.retire.extents += t.retired_set.size();
+ io_stat_t retire_stat;
for (auto &i: t.retired_set) {
- efforts.retire.bytes += i->get_length();
+ retire_stat.increment(i->get_length());
}
+ efforts.retire.increment_stat(retire_stat);
- auto& fresh_stats = t.get_fresh_block_stats();
- efforts.fresh.extents += fresh_stats.num;
- efforts.fresh.bytes += fresh_stats.bytes;
+ auto& fresh_stat = t.get_fresh_block_stats();
+ efforts.fresh.increment_stat(fresh_stat);
+ io_stat_t delta_stat;
for (auto &i: t.mutated_block_list) {
if (!i->is_valid()) {
continue;
}
- DEBUGT("was mutating extent: {}", t, *i);
efforts.mutate.increment(i->get_length());
- efforts.mutate_delta_bytes += i->get_delta().length();
+ delta_stat.increment(i->get_delta().length());
}
+ efforts.mutate_delta_bytes += delta_stat.bytes;
auto& ool_stats = t.get_ool_write_stats();
- efforts.fresh_ool_written.extents += ool_stats.extents.num;
- efforts.fresh_ool_written.bytes += ool_stats.extents.bytes;
+ efforts.fresh_ool_written.increment_stat(ool_stats.extents);
efforts.num_ool_records += ool_stats.num_records;
- efforts.ool_record_bytes +=
- (ool_stats.header_bytes + ool_stats.data_bytes);
+ auto ool_record_bytes = (ool_stats.header_bytes + ool_stats.data_bytes);
+ efforts.ool_record_bytes += ool_record_bytes;
// Note: we only account overhead from committed ool records
if (t.get_src() == Transaction::src_t::CLEANER_TRIM ||
@@ -805,6 +802,16 @@ void Cache::mark_transaction_conflicted(
get_by_src(stats.invalidated_lba_tree_efforts, t.get_src()
).increment(t.lba_tree_stats);
+
+ SUBDEBUGT(seastore_t,
+ "discard {} read, {} fresh, {} delta, {} retire, {}({}B) ool-records",
+ t,
+ read_stat,
+ fresh_stat,
+ delta_stat,
+ retire_stat,
+ ool_stats.num_records,
+ ool_record_bytes);
} else {
// read transaction won't have non-read efforts
assert(t.retired_set.empty());
@@ -813,26 +820,31 @@ void Cache::mark_transaction_conflicted(
assert(t.get_ool_write_stats().is_clear());
assert(t.onode_tree_stats.is_clear());
assert(t.lba_tree_stats.is_clear());
+ SUBDEBUGT(seastore_t, "discard {} read", t, read_stat);
}
}
void Cache::on_transaction_destruct(Transaction& t)
{
LOG_PREFIX(Cache::on_transaction_destruct);
+ SUBTRACET(seastore_t, "", t);
if (t.get_src() == Transaction::src_t::READ &&
- t.conflicted == false &&
- !t.is_weak()) {
- DEBUGT("read is successful", t);
- ++stats.success_read_efforts.num_trans;
-
- auto& effort = stats.success_read_efforts.read;
- effort.extents += t.read_set.size();
+ t.conflicted == false) {
+ io_stat_t read_stat;
for (auto &i: t.read_set) {
- effort.bytes += i.ref->get_length();
+ read_stat.increment(i.ref->get_length());
}
+ SUBDEBUGT(seastore_t, "done {} read", t, read_stat);
+
+ if (!t.is_weak()) {
+ // exclude weak transaction as it is impossible to conflict
+ ++stats.success_read_efforts.num_trans;
+ stats.success_read_efforts.read.increment_stat(read_stat);
+ }
+
// read transaction won't have non-read efforts
assert(t.retired_set.empty());
- assert(t.get_fresh_block_stats().num == 0);
+ assert(t.get_fresh_block_stats().is_clear());
assert(t.mutated_block_list.empty());
assert(t.onode_tree_stats.is_clear());
assert(t.lba_tree_stats.is_clear());
@@ -846,9 +858,12 @@ CachedExtentRef Cache::alloc_new_extent_by_type(
bool delay ///< [in] whether to delay paddr alloc
)
{
+ LOG_PREFIX(Cache::alloc_new_extent_by_type);
+ SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}",
+ t, type, length, delay);
switch (type) {
case extent_types_t::ROOT:
- assert(0 == "ROOT is never directly alloc'd");
+ ceph_assert(0 == "ROOT is never directly alloc'd");
return CachedExtentRef();
case extent_types_t::LADDR_INTERNAL:
return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, delay);
@@ -906,7 +921,7 @@ CachedExtentRef Cache::duplicate_for_write(
record_t Cache::prepare_record(Transaction &t)
{
LOG_PREFIX(Cache::prepare_record);
- DEBUGT("enter", t);
+ SUBTRACET(seastore_t, "enter", t);
auto trans_src = t.get_src();
assert(!t.is_weak());
@@ -916,33 +931,38 @@ record_t Cache::prepare_record(Transaction &t)
trans_src);
// Should be valid due to interruptible future
+ io_stat_t read_stat;
for (auto &i: t.read_set) {
if (!i.ref->is_valid()) {
- DEBUGT("read_set invalid extent: {}, aborting", t, *i.ref);
+ SUBERRORT(seastore_t,
+ "read_set got invalid extent, aborting -- {}", t, *i.ref);
ceph_abort("no invalid extent allowed in transactions' read_set");
}
get_by_ext(efforts.read_by_ext,
i.ref->get_type()).increment(i.ref->get_length());
+ read_stat.increment(i.ref->get_length());
}
- DEBUGT("read_set validated", t);
t.read_set.clear();
+ t.write_set.clear();
record_t record;
- t.write_set.clear();
-
// Add new copy of mutated blocks, set_io_wait to block until written
record.deltas.reserve(t.mutated_block_list.size());
+ io_stat_t delta_stat;
for (auto &i: t.mutated_block_list) {
if (!i->is_valid()) {
- DEBUGT("ignoring invalid {}", t, *i);
+ DEBUGT("invalid mutated extent -- {}", t, *i);
continue;
}
- DEBUGT("mutating {}", t, *i);
+ assert(i->prior_instance);
+ auto delta_bl = i->get_delta();
+ auto delta_length = delta_bl.length();
+ DEBUGT("mutated extent with {}B delta -- {}, prior={}",
+ t, delta_length, *i, *i->prior_instance);
get_by_ext(efforts.mutate_by_ext,
i->get_type()).increment(i->get_length());
- assert(i->prior_instance);
commit_replace_extent(t, i, i->prior_instance);
i->prepare_write();
@@ -951,8 +971,10 @@ record_t Cache::prepare_record(Transaction &t)
assert(i->get_version() > 0);
auto final_crc = i->get_crc32c();
if (i->get_type() == extent_types_t::ROOT) {
+ SUBTRACET(seastore_t, "writing out root delta {}B -- {}",
+ t, delta_length, *i);
+ assert(t.root == i);
root = t.root;
- DEBUGT("writing out root delta for {}", t, *t.root);
record.push_back(
delta_info_t{
extent_types_t::ROOT,
@@ -962,7 +984,7 @@ record_t Cache::prepare_record(Transaction &t)
0,
0,
t.root->get_version() - 1,
- t.root->get_delta()
+ std::move(delta_bl)
});
} else {
record.push_back(
@@ -976,22 +998,24 @@ record_t Cache::prepare_record(Transaction &t)
final_crc,
(seastore_off_t)i->get_length(),
i->get_version() - 1,
- i->get_delta()
+ std::move(delta_bl)
});
i->last_committed_crc = final_crc;
}
- auto delta_length = record.deltas.back().bl.length();
assert(delta_length);
get_by_ext(efforts.delta_bytes_by_ext,
i->get_type()) += delta_length;
+ delta_stat.increment(delta_length);
}
// Transaction is now a go, set up in-memory cache state
// invalidate now invalid blocks
+ io_stat_t retire_stat;
for (auto &i: t.retired_set) {
- DEBUGT("retiring {}", t, *i);
+ DEBUGT("retired extent -- {}", t, *i);
get_by_ext(efforts.retire_by_ext,
i->get_type()).increment(i->get_length());
+ retire_stat.increment(i->get_length());
commit_retire_extent(t, i);
if (i->backend_type == device_type_t::RANDOM_BLOCK) {
paddr_t paddr = i->get_paddr();
@@ -1003,14 +1027,18 @@ record_t Cache::prepare_record(Transaction &t)
}
record.extents.reserve(t.inline_block_list.size());
+ io_stat_t fresh_stat;
+ io_stat_t fresh_invalid_stat;
for (auto &i: t.inline_block_list) {
if (!i->is_valid()) {
- DEBUGT("fresh inline block (invalid) {}", t, *i);
+ DEBUGT("invalid fresh inline extent -- {}", t, *i);
+ fresh_invalid_stat.increment(i->get_length());
get_by_ext(efforts.fresh_invalid_by_ext,
i->get_type()).increment(i->get_length());
} else {
- DEBUGT("fresh inline block {}", t, *i);
+ DEBUGT("fresh inline extent -- {}", t, *i);
}
+ fresh_stat.increment(i->get_length());
get_by_ext(efforts.fresh_inline_by_ext,
i->get_type()).increment(i->get_length());
assert(i->is_inline());
@@ -1019,7 +1047,7 @@ record_t Cache::prepare_record(Transaction &t)
i->prepare_write();
bl.append(i->get_bptr());
if (i->get_type() == extent_types_t::ROOT) {
- assert(0 == "ROOT never gets written as a fresh block");
+ ceph_assert(0 == "ROOT never gets written as a fresh block");
}
assert(bl.length() == i->get_length());
@@ -1045,7 +1073,7 @@ record_t Cache::prepare_record(Transaction &t)
for (auto &i: t.ool_block_list) {
ceph_assert(i->is_valid());
- DEBUGT("fresh ool block {}", t, *i);
+ DEBUGT("fresh ool extent -- {}", t, *i);
get_by_ext(efforts.fresh_ool_by_ext,
i->get_type()).increment(i->get_length());
}
@@ -1059,23 +1087,29 @@ record_t Cache::prepare_record(Transaction &t)
ceph_assert(ool_stats.extents.num == t.ool_block_list.size());
if (record.is_empty()) {
- // XXX: improve osd logic to not submit empty transactions.
- DEBUGT("record to submit is empty, src={}!", t, trans_src);
+ SUBINFOT(seastore_t,
+ "record to submit is empty, src={}", t, trans_src);
assert(t.onode_tree_stats.is_clear());
assert(t.lba_tree_stats.is_clear());
assert(ool_stats.is_clear());
}
- DEBUGT("record is ready to submit, src={}, mdsize={}, dsize={}; "
- "{} ool records, mdsize={}, dsize={}, fillness={}",
- t, trans_src,
- record.size.get_raw_mdlength(),
- record.size.dlength,
- ool_stats.num_records,
- ool_stats.header_raw_bytes,
- ool_stats.data_bytes,
- ((double)(ool_stats.header_raw_bytes + ool_stats.data_bytes) /
- (ool_stats.header_bytes + ool_stats.data_bytes)));
+ SUBDEBUGT(seastore_t,
+ "commit H{} {} read, {} fresh with {} invalid, {} delta, {} retire, "
+ "{}(md={}B, data={}B, fill={}) ool-records, {}B md, {}B data",
+ t, (void*)&t.get_handle(),
+ read_stat,
+ fresh_stat,
+ fresh_invalid_stat,
+ delta_stat,
+ retire_stat,
+ ool_stats.num_records,
+ ool_stats.header_raw_bytes,
+ ool_stats.data_bytes,
+ ((double)(ool_stats.header_raw_bytes + ool_stats.data_bytes) /
+ (ool_stats.header_bytes + ool_stats.data_bytes)),
+ record.size.get_raw_mdlength(),
+ record.size.dlength);
if (trans_src == Transaction::src_t::CLEANER_TRIM ||
trans_src == Transaction::src_t::CLEANER_RECLAIM) {
// CLEANER transaction won't contain any onode tree operations
@@ -1113,7 +1147,8 @@ void Cache::complete_commit(
SegmentCleaner *cleaner)
{
LOG_PREFIX(Cache::complete_commit);
- DEBUGT("enter", t);
+ SUBTRACET(seastore_t, "final_block_start={}, seq={}",
+ t, final_block_start, seq);
t.for_each_fresh_block([&](auto &i) {
if (i->is_inline()) {
@@ -1124,7 +1159,6 @@ void Cache::complete_commit(
if (i->is_valid()) {
i->state = CachedExtent::extent_state_t::CLEAN;
- DEBUGT("fresh {}", t, *i);
add_extent(i);
if (cleaner) {
cleaner->mark_space_used(
@@ -1139,7 +1173,6 @@ void Cache::complete_commit(
if (!i->is_valid()) {
continue;
}
- DEBUGT("mutated {}", t, *i);
assert(i->prior_instance);
i->on_delta_write(final_block_start);
i->prior_instance = CachedExtentRef();
@@ -1166,12 +1199,14 @@ void Cache::complete_commit(
last_commit = seq;
for (auto &i: t.retired_set) {
- DEBUGT("retiring {}", t, *i);
i->dirty_from_or_retired_at = last_commit;
}
}
-void Cache::init() {
+void Cache::init()
+{
+ LOG_PREFIX(Cache::init);
+ INFO("init root");
if (root) {
// initial creation will do mkfs followed by mount each of which calls init
remove_extent(root);
@@ -1184,6 +1219,8 @@ void Cache::init() {
Cache::mkfs_iertr::future<> Cache::mkfs(Transaction &t)
{
+ LOG_PREFIX(Cache::mkfs);
+ INFOT("create root", t);
return get_root(t).si_then([this, &t](auto croot) {
duplicate_for_write(t, croot);
return mkfs_iertr::now();
@@ -1197,6 +1234,15 @@ Cache::mkfs_iertr::future<> Cache::mkfs(Transaction &t)
Cache::close_ertr::future<> Cache::close()
{
+ LOG_PREFIX(Cache::close);
+ INFO("close with {}({}B) dirty from {}, {}({}B) lru, totally {}({}B) indexed extents",
+ dirty.size(),
+ stats.dirty_bytes,
+ get_oldest_dirty_from().value_or(journal_seq_t{}),
+ lru.get_current_contents_extents(),
+ lru.get_current_contents_bytes(),
+ extents.size(),
+ extents.get_bytes());
root.reset();
for (auto i = dirty.begin(); i != dirty.end(); ) {
auto ptr = &*i;
@@ -1205,7 +1251,7 @@ Cache::close_ertr::future<> Cache::close()
intrusive_ptr_release(ptr);
}
assert(stats.dirty_bytes == 0);
- clear_lru();
+ lru.clear();
return close_ertr::now();
}
@@ -1217,7 +1263,7 @@ Cache::replay_delta(
{
LOG_PREFIX(Cache::replay_delta);
if (delta.type == extent_types_t::ROOT) {
- DEBUG("found root delta");
+ DEBUG("replay root delta {} at {} {}", delta, journal_seq, record_base);
remove_extent(root);
root->apply_delta_and_adjust_crc(record_base, delta.bl);
root->dirty_from_or_retired_at = journal_seq;
@@ -1259,14 +1305,14 @@ Cache::replay_delta(
);
return extent_fut.safe_then([=, &delta](auto extent) {
if (!extent) {
+ DEBUG("replay extent is not present, so delta is obsolete {} at {} {}",
+ delta, journal_seq, record_base);
assert(delta.pversion > 0);
- DEBUG(
- "replaying {}, extent not present so delta is obsolete",
- delta);
return;
}
- DEBUG("replaying {} on {}", *extent, delta);
+ DEBUG("replay extent delta {} at {} {} -- {} ...",
+ delta, journal_seq, record_base, *extent);
assert(extent->version == delta.pversion);
@@ -1289,21 +1335,31 @@ Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents(
size_t max_bytes)
{
LOG_PREFIX(Cache::get_next_dirty_extents);
+ if (dirty.empty()) {
+ DEBUGT("max_bytes={}B, seq={}, dirty is empty",
+ t, max_bytes, seq);
+ } else {
+ DEBUGT("max_bytes={}B, seq={}, dirty_from={}",
+ t, max_bytes, seq, dirty.begin()->get_dirty_from());
+ }
std::vector<CachedExtentRef> cand;
size_t bytes_so_far = 0;
for (auto i = dirty.begin();
i != dirty.end() && bytes_so_far < max_bytes;
++i) {
- if (i->get_dirty_from() != journal_seq_t() && i->get_dirty_from() < seq) {
- DEBUGT("next {}", t, *i);
- if (!(cand.empty() ||
- cand.back()->get_dirty_from() <= i->get_dirty_from())) {
- ERRORT("last {}, next {}", t, *cand.back(), *i);
+ auto dirty_from = i->get_dirty_from();
+ ceph_assert(dirty_from != journal_seq_t() &&
+ dirty_from != JOURNAL_SEQ_MAX &&
+ dirty_from != NO_DELTAS);
+ if (dirty_from < seq) {
+ TRACET("next extent -- {}", t, *i);
+ if (!cand.empty() && cand.back()->get_dirty_from() > dirty_from) {
+ ERRORT("dirty extents are not ordered by dirty_from -- last={}, next={}",
+ t, *cand.back(), *i);
+ ceph_abort();
}
- assert(cand.empty() || cand.back()->get_dirty_from() <= i->get_dirty_from());
bytes_so_far += i->get_length();
cand.push_back(&*i);
-
} else {
break;
}
@@ -1315,8 +1371,7 @@ Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents(
return trans_intr::do_for_each(
cand,
[FNAME, this, &t, &ret](auto &ext) {
- DEBUG("waiting on {}", *ext);
-
+ TRACET("waiting on extent -- {}", t, *ext);
return trans_intr::make_interruptible(
ext->wait_io()
).then_interruptible([FNAME, this, ext, &t, &ret] {
@@ -1329,12 +1384,12 @@ Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents(
CachedExtentRef on_transaction;
auto result = t.get_extent(ext->get_paddr(), &on_transaction);
if (result == Transaction::get_extent_ret::ABSENT) {
- DEBUGT("{} absent on t", t, *ext);
+ DEBUGT("extent is absent on t -- {}", t, *ext);
t.add_to_read_set(ext);
if (ext->get_type() == extent_types_t::ROOT) {
if (t.root) {
assert(&*t.root == &*ext);
- assert(0 == "t.root would have to already be in the read set");
+ ceph_assert(0 == "t.root would have to already be in the read set");
} else {
assert(&*ext == &*root);
t.root = root;
@@ -1342,11 +1397,11 @@ Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents(
}
ret.push_back(ext);
} else if (result == Transaction::get_extent_ret::PRESENT) {
- DEBUGT("{} present on t as {}", t, *ext, *on_transaction);
+ DEBUGT("extent is present on t -- {}, on t {}", t, *ext, *on_transaction);
ret.push_back(on_transaction);
} else {
assert(result == Transaction::get_extent_ret::RETIRED);
- DEBUGT("{} retired on t", t, *ext);
+ DEBUGT("extent is retired on t -- {}", t, *ext);
}
});
}).then_interruptible([&ret] {
@@ -1359,16 +1414,16 @@ Cache::get_root_ret Cache::get_root(Transaction &t)
{
LOG_PREFIX(Cache::get_root);
if (t.root) {
- DEBUGT("root already on transaction {}", t, *t.root);
+ TRACET("root already on t -- {}", t, *t.root);
return get_root_iertr::make_ready_future<RootBlockRef>(
t.root);
} else {
auto ret = root;
- DEBUGT("waiting root {}", t, *ret);
+ DEBUGT("root not on t -- {}", t, *ret);
return ret->wait_io().then([this, FNAME, ret, &t] {
- DEBUGT("got root read {}", t, *ret);
+ TRACET("root wait io done -- {}", t, *ret);
if (!ret->is_valid()) {
- DEBUGT("root became invalid: {}", t, *ret);
+ DEBUGT("root is invalid -- {}", t, *ret);
++(get_by_src(stats.trans_conflicts_by_unknown, t.get_src()));
mark_transaction_conflicted(t, *ret);
return get_root_iertr::make_ready_future<RootBlockRef>();
@@ -1400,7 +1455,7 @@ Cache::get_extent_ertr::future<CachedExtentRef> Cache::_get_extent_by_type(
switch (type) {
case extent_types_t::ROOT:
- assert(0 == "ROOT is never directly read");
+ ceph_assert(0 == "ROOT is never directly read");
return get_extent_ertr::make_ready_future<CachedExtentRef>();
case extent_types_t::LADDR_INTERNAL:
return get_extent<lba_manager::btree::LBAInternalNode>(
diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h
index 0b4a50afed7..a63162a45a0 100644
--- a/src/crimson/os/seastore/cache.h
+++ b/src/crimson/os/seastore/cache.h
@@ -83,6 +83,15 @@ namespace crimson::os::seastore {
* CachedExtent::delta_written(paddr_t) with the address of the start
* of the record
* - Complete all promises with the final record start paddr_t
+ *
+ *
+ * Cache logs
+ *
+ * levels:
+ * - INFO: major initiation, closing operations
+ * - DEBUG: major extent related operations, INFO details
+ * - TRACE: DEBUG details
+ * - seastore_t logs
*/
class Cache {
public:
@@ -111,8 +120,8 @@ public:
return on_transaction_destruct(t);
}
);
- SUBDEBUGT(seastore_cache, "created name={}, source={}, is_weak={}",
- *ret, name, src, is_weak);
+ SUBDEBUGT(seastore_t, "created name={}, source={}, is_weak={}",
+ *ret, name, src, is_weak);
return ret;
}
@@ -120,25 +129,16 @@ public:
void reset_transaction_preserve_handle(Transaction &t) {
LOG_PREFIX(Cache::reset_transaction_preserve_handle);
if (t.did_reset()) {
+ SUBTRACET(seastore_t, "reset", t);
++(get_by_src(stats.trans_created_by_src, t.get_src()));
}
t.reset_preserve_handle(last_commit);
- SUBDEBUGT(seastore_cache, "reset", t);
- }
-
- /**
- * drop_from_cache
- *
- * Drop extent from cache. Intended for use when
- * ref refers to a logically dead extent as during
- * replay.
- */
- void drop_from_cache(CachedExtentRef ref) {
- remove_extent(ref);
}
/// Declare ref retired in t
void retire_extent(Transaction &t, CachedExtentRef ref) {
+ LOG_PREFIX(Cache::retire_extent);
+ SUBDEBUGT(seastore_cache, "retire extent -- {}", t, *ref);
t.add_to_retired_set(ref);
}
@@ -163,6 +163,8 @@ public:
* returns t.root and assume it is already present/read in t
*/
RootBlockRef get_root_fast(Transaction &t) {
+ LOG_PREFIX(Cache::get_root_fast);
+ SUBTRACET(seastore_cache, "root already on t -- {}", t, *t.root);
assert(t.root);
return t.root;
}
@@ -185,8 +187,11 @@ public:
const src_ext_t* p_metric_key, ///< [in] cache query metric key
Func &&extent_init_func ///< [in] init func for extent
) {
+ LOG_PREFIX(Cache::get_extent);
auto cached = query_cache(offset, p_metric_key);
if (!cached) {
+ SUBDEBUG(seastore_cache,
+ "{} {}~{} is absent, reading ...", T::TYPE, offset, length);
auto ret = CachedExtent::make_cached_extent_ref<T>(
alloc_cache_buf(length));
ret->set_paddr(offset);
@@ -199,6 +204,9 @@ public:
// extent PRESENT in cache
if (cached->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
+ SUBDEBUG(seastore_cache,
+ "{} {}~{} is absent(placeholder), reading ...",
+ T::TYPE, offset, length);
auto ret = CachedExtent::make_cached_extent_ref<T>(
alloc_cache_buf(length));
ret->set_paddr(offset);
@@ -216,6 +224,9 @@ public:
return read_extent<T>(
std::move(ret));
} else {
+ SUBTRACE(seastore_cache,
+ "{} {}~{} is present in cache -- {}",
+ T::TYPE, offset, length, *cached);
auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get()));
return ret->wait_io(
).then([ret=std::move(ret)]() mutable
@@ -253,11 +264,14 @@ public:
CachedExtentRef ret;
LOG_PREFIX(Cache::get_extent_if_cached);
auto result = t.get_extent(offset, &ret);
- if (result != Transaction::get_extent_ret::ABSENT) {
- // including get_extent_ret::RETIRED
- SUBDEBUGT(seastore_cache,
- "Found extent at offset {} on transaction: {}",
- t, offset, *ret);
+ if (result == Transaction::get_extent_ret::RETIRED) {
+ SUBDEBUGT(seastore_cache, "{} {} is retired on t -- {}",
+ t, type, offset, *ret);
+ return get_extent_if_cached_iertr::make_ready_future<
+ CachedExtentRef>(ret);
+ } else if (result == Transaction::get_extent_ret::PRESENT) {
+ SUBTRACET(seastore_cache, "{} {} is present on t -- {}",
+ t, type, offset, *ret);
return get_extent_if_cached_iertr::make_ready_future<
CachedExtentRef>(ret);
}
@@ -268,17 +282,15 @@ public:
if (!ret ||
// retired_placeholder is not really cached yet
ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
- SUBDEBUGT(seastore_cache,
- "No extent at offset {}, retired_placeholder: {}",
- t, offset, !!ret);
+ SUBDEBUGT(seastore_cache, "{} {} is absent{}",
+ t, type, offset, !!ret ? "(placeholder)" : "");
return get_extent_if_cached_iertr::make_ready_future<
CachedExtentRef>();
}
// present in cache and is not a retired_placeholder
- SUBDEBUGT(seastore_cache,
- "Found extent at offset {} in cache: {}",
- t, offset, *ret);
+ SUBDEBUGT(seastore_cache, "{} {} is present in cache -- {}",
+ t, type, offset, *ret);
t.add_to_read_set(ret);
touch_extent(*ret);
return ret->wait_io().then([ret] {
@@ -308,29 +320,33 @@ public:
LOG_PREFIX(Cache::get_extent);
auto result = t.get_extent(offset, &ret);
if (result != Transaction::get_extent_ret::ABSENT) {
+ SUBTRACET(seastore_cache, "{} {}~{} is {} on t -- {}",
+ t,
+ T::TYPE,
+ offset,
+ length,
+ result == Transaction::get_extent_ret::PRESENT ? "present" : "retired",
+ *ret);
assert(result != Transaction::get_extent_ret::RETIRED);
- SUBDEBUGT(seastore_cache,
- "Found extent at offset {} on transaction: {}",
- t, offset, *ret);
return seastar::make_ready_future<TCachedExtentRef<T>>(
ret->cast<T>());
} else {
+ SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...",
+ t, T::TYPE, offset, length);
auto metric_key = std::make_pair(t.get_src(), T::TYPE);
return trans_intr::make_interruptible(
get_extent<T>(
offset, length, &metric_key,
std::forward<Func>(extent_init_func))
- ).si_then([this, FNAME, offset, &t](auto ref) {
+ ).si_then([this, FNAME, offset, length, &t](auto ref) {
(void)this; // silence incorrect clang warning about capture
if (!ref->is_valid()) {
- SUBDEBUGT(seastore_cache, "got invalid extent: {}", t, ref);
+ SUBDEBUGT(seastore_cache, "{} {}~{} is invalid -- {}",
+ t, T::TYPE, offset, length, *ref);
++(get_by_src(stats.trans_conflicts_by_unknown, t.get_src()));
mark_transaction_conflicted(t, *ref);
return get_extent_iertr::make_ready_future<TCachedExtentRef<T>>();
} else {
- SUBDEBUGT(seastore_cache,
- "Read extent at offset {} in cache: {}",
- t, offset, *ref);
touch_extent(*ref);
t.add_to_read_set(ref);
return get_extent_iertr::make_ready_future<TCachedExtentRef<T>>(
@@ -347,13 +363,6 @@ public:
return get_extent<T>(t, offset, length, [](T &){});
}
-
- /**
- * get_extent_by_type
- *
- * Based on type, instantiate the correct concrete type
- * and read in the extent at location offset~length.
- */
private:
// This is a workaround std::move_only_function not being available,
// not really worth generalizing at this time.
@@ -399,14 +408,22 @@ private:
paddr_t offset,
laddr_t laddr,
seastore_off_t length,
- extent_init_func_t &&extent_init_func) {
+ extent_init_func_t &&extent_init_func
+ ) {
+ LOG_PREFIX(Cache::get_extent_by_type);
CachedExtentRef ret;
auto status = t.get_extent(offset, &ret);
if (status == Transaction::get_extent_ret::RETIRED) {
+ SUBDEBUGT(seastore_cache, "{} {}~{} {} is retired on t -- {}",
+ t, type, offset, length, laddr, *ret);
return seastar::make_ready_future<CachedExtentRef>();
} else if (status == Transaction::get_extent_ret::PRESENT) {
+ SUBTRACET(seastore_cache, "{} {}~{} {} is present on t -- {}",
+ t, type, offset, length, laddr, *ret);
return seastar::make_ready_future<CachedExtentRef>(ret);
} else {
+ SUBTRACET(seastore_cache, "{} {}~{} {} is absent on t, query cache ...",
+ t, type, offset, length, laddr);
auto src = t.get_src();
return trans_intr::make_interruptible(
_get_extent_by_type(
@@ -414,8 +431,8 @@ private:
std::move(extent_init_func))
).si_then([=, &t](CachedExtentRef ret) {
if (!ret->is_valid()) {
- LOG_PREFIX(Cache::get_extent_by_type);
- SUBDEBUGT(seastore_cache, "got invalid extent: {}", t, ret);
+ SUBDEBUGT(seastore_cache, "{} {}~{} {} is invalid -- {}",
+ t, type, offset, length, laddr, *ret);
++(get_by_src(stats.trans_conflicts_by_unknown, t.get_src()));
mark_transaction_conflicted(t, *ret.get());
return get_extent_ertr::make_ready_future<CachedExtentRef>();
@@ -430,6 +447,12 @@ private:
}
public:
+ /**
+ * get_extent_by_type
+ *
+ * Based on type, instantiate the correct concrete type
+ * and read in the extent at location offset~length.
+ */
template <typename Func>
get_extent_by_type_ret get_extent_by_type(
Transaction &t, ///< [in] transaction
@@ -470,6 +493,9 @@ public:
seastore_off_t length, ///< [in] length
bool delayed = false ///< [in] whether the paddr allocation of extent is delayed
) {
+ LOG_PREFIX(Cache::alloc_new_extent);
+ SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}",
+ t, T::TYPE, length, delayed);
auto ret = CachedExtent::make_cached_extent_ref<T>(
alloc_cache_buf(length));
t.add_fresh_extent(ret, delayed);
@@ -477,20 +503,23 @@ public:
return ret;
}
- void clear_lru() {
- lru.clear();
- }
-
void mark_delayed_extent_inline(
Transaction& t,
- LogicalCachedExtentRef& ref) {
+ LogicalCachedExtentRef& ref
+ ) {
+ LOG_PREFIX(Cache::mark_delayed_extent_inline);
+ SUBDEBUGT(seastore_cache, "-- {}", t, *ref);
t.mark_delayed_extent_inline(ref);
}
void mark_delayed_extent_ool(
Transaction& t,
LogicalCachedExtentRef& ref,
- paddr_t final_addr) {
+ paddr_t final_addr
+ ) {
+ LOG_PREFIX(Cache::mark_delayed_extent_ool);
+ SUBDEBUGT(seastore_cache, "final_addr={} -- {}",
+ t, final_addr, *ref);
t.mark_delayed_extent_ool(ref, final_addr);
}
@@ -598,26 +627,56 @@ public:
Transaction &t,
F &&f)
{
+ LOG_PREFIX(Cache::init_cached_extents);
+ SUBINFOT(seastore_cache,
+ "start with {}({}B) extents, {} dirty, from {}",
+ t,
+ extents.size(),
+ extents.get_bytes(),
+ dirty.size(),
+ get_oldest_dirty_from().value_or(journal_seq_t{}));
+
// journal replay should has been finished at this point,
// Cache::root should have been inserted to the dirty list
assert(root->is_dirty());
- std::vector<CachedExtentRef> dirty;
+ std::vector<CachedExtentRef> _dirty;
for (auto &e : extents) {
- dirty.push_back(CachedExtentRef(&e));
+ _dirty.push_back(CachedExtentRef(&e));
}
return seastar::do_with(
std::forward<F>(f),
- std::move(dirty),
- [&t](auto &f, auto &refs) mutable {
- return trans_intr::do_for_each(
- refs,
- [&t, &f](auto &e) { return f(t, e); });
- }).handle_error_interruptible(
- init_cached_extents_iertr::pass_further{},
- crimson::ct_error::assert_all{
- "Invalid error in Cache::init_cached_extents"
- }
- );
+ std::move(_dirty),
+ [this, FNAME, &t](auto &f, auto &refs) mutable
+ {
+ return trans_intr::do_for_each(
+ refs,
+ [this, FNAME, &t, &f](auto &e)
+ {
+ SUBTRACET(seastore_cache, "inspecting extent ... -- {}", t, *e);
+ return f(t, e
+ ).si_then([this, FNAME, &t, e](bool is_alive) {
+ if (!is_alive) {
+ SUBDEBUGT(seastore_cache, "extent is not alive, remove -- {}", t, *e);
+ remove_extent(e);
+ } else {
+ SUBDEBUGT(seastore_cache, "extent is alive -- {}", t, *e);
+ }
+ });
+ });
+ }).handle_error_interruptible(
+ init_cached_extents_iertr::pass_further{},
+ crimson::ct_error::assert_all{
+ "Invalid error in Cache::init_cached_extents"
+ }
+ ).si_then([this, FNAME, &t] {
+ SUBINFOT(seastore_cache,
+ "finish with {}({}B) extents, {} dirty, from {}",
+ t,
+ extents.size(),
+ extents.get_bytes(),
+ dirty.size(),
+ get_oldest_dirty_from().value_or(journal_seq_t{}));
+ });
}
/**
@@ -743,6 +802,10 @@ private:
public:
LRU(size_t capacity) : capacity(capacity) {}
+ size_t get_capacity() const {
+ return capacity;
+ }
+
size_t get_current_contents_bytes() const {
return contents;
}
@@ -797,49 +860,29 @@ private:
uint64_t hit = 0;
};
- /**
- * effort_t
- *
- * Count the number of extents involved in the effort and the total bytes of
- * them.
- *
- * Each effort_t represents the effort of a set of extents involved in the
- * transaction, classified by read, mutate, retire and allocate behaviors,
- * see XXX_trans_efforts_t.
- */
- struct effort_t {
- uint64_t extents = 0;
- uint64_t bytes = 0;
-
- void increment(uint64_t extent_len) {
- ++extents;
- bytes += extent_len;
- }
- };
-
template <typename CounterT>
using counter_by_extent_t = std::array<CounterT, EXTENT_TYPES_MAX>;
struct invalid_trans_efforts_t {
- effort_t read;
- effort_t mutate;
+ io_stat_t read;
+ io_stat_t mutate;
uint64_t mutate_delta_bytes = 0;
- effort_t retire;
- effort_t fresh;
- effort_t fresh_ool_written;
+ io_stat_t retire;
+ io_stat_t fresh;
+ io_stat_t fresh_ool_written;
counter_by_extent_t<uint64_t> num_trans_invalidated;
uint64_t num_ool_records = 0;
uint64_t ool_record_bytes = 0;
};
struct commit_trans_efforts_t {
- counter_by_extent_t<effort_t> read_by_ext;
- counter_by_extent_t<effort_t> mutate_by_ext;
+ counter_by_extent_t<io_stat_t> read_by_ext;
+ counter_by_extent_t<io_stat_t> mutate_by_ext;
counter_by_extent_t<uint64_t> delta_bytes_by_ext;
- counter_by_extent_t<effort_t> retire_by_ext;
- counter_by_extent_t<effort_t> fresh_invalid_by_ext; // inline but is already invalid (retired)
- counter_by_extent_t<effort_t> fresh_inline_by_ext;
- counter_by_extent_t<effort_t> fresh_ool_by_ext;
+ counter_by_extent_t<io_stat_t> retire_by_ext;
+ counter_by_extent_t<io_stat_t> fresh_invalid_by_ext; // inline but is already invalid (retired)
+ counter_by_extent_t<io_stat_t> fresh_inline_by_ext;
+ counter_by_extent_t<io_stat_t> fresh_ool_by_ext;
uint64_t num_trans = 0; // the number of inline records
uint64_t num_ool_records = 0;
uint64_t ool_record_padding_bytes = 0;
@@ -849,7 +892,7 @@ private:
};
struct success_read_trans_efforts_t {
- effort_t read;
+ io_stat_t read;
uint64_t num_trans = 0;
};
@@ -995,12 +1038,14 @@ private:
extent->get_bptr()
).safe_then(
[extent=std::move(extent)]() mutable {
+ LOG_PREFIX(Cache::read_extent);
extent->state = CachedExtent::extent_state_t::CLEAN;
/* TODO: crc should be checked against LBA manager */
extent->last_committed_crc = extent->get_crc32c();
extent->on_clean_read();
extent->complete_io();
+ SUBDEBUG(seastore_cache, "read extent done -- {}", *extent);
return get_extent_ertr::make_ready_future<TCachedExtentRef<T>>(
std::move(extent));
},
diff --git a/src/crimson/os/seastore/journal.cc b/src/crimson/os/seastore/journal.cc
index 173a6c8124c..d7854ee6cf6 100644
--- a/src/crimson/os/seastore/journal.cc
+++ b/src/crimson/os/seastore/journal.cc
@@ -648,9 +648,13 @@ Journal::RecordSubmitter::submit(
seastar::future<> Journal::RecordSubmitter::flush(OrderingHandle &handle)
{
+ LOG_PREFIX(RecordSubmitter::flush);
+ DEBUG("H{} flush", (void*)&handle);
return handle.enter(write_pipeline->device_submission
).then([this, &handle] {
return handle.enter(write_pipeline->finalize);
+ }).then([FNAME, &handle] {
+ DEBUG("H{} flush done", (void*)&handle);
});
}
diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h
index c98302371ed..69aa0954a9c 100644
--- a/src/crimson/os/seastore/lba_manager.h
+++ b/src/crimson/os/seastore/lba_manager.h
@@ -118,9 +118,11 @@ public:
* Implementation must initialize the LBAPin on any
* LogicalCachedExtent's and may also read in any dependent
* structures, etc.
+ *
+ * @return returns whether the extent is alive
*/
using init_cached_extent_iertr = base_iertr;
- using init_cached_extent_ret = init_cached_extent_iertr::future<>;
+ using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
virtual init_cached_extent_ret init_cached_extent(
Transaction &t,
CachedExtentRef e) = 0;
diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
index f15582733f7..c9b9ee865b3 100644
--- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
+++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
@@ -266,14 +266,17 @@ BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
{
LOG_PREFIX(BtreeLBAManager::init_cached_extent);
DEBUGT("extent {}", t, *e);
- auto c = get_context(t);
- return with_btree(
- c,
- [c, e](auto &btree) {
- return btree.init_cached_extent(
- c, e
- ).si_then([](auto) {});
+ return seastar::do_with(bool(), [this, e, &t](bool& ret) {
+ auto c = get_context(t);
+ return with_btree(c, [c, e, &ret](auto &btree) {
+ return btree.init_cached_extent(c, e
+ ).si_then([&ret](bool is_alive) {
+ ret = is_alive;
+ });
+ }).si_then([&ret] {
+ return ret;
});
+ });
}
BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings(
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc
index 84358df08da..9bea6b596ab 100644
--- a/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc
+++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc
@@ -286,12 +286,11 @@ LBABtree::init_cached_extent_ret LBABtree::init_cached_extent(
c.pins->add_pin(
static_cast<BtreeLBAPin&>(logn->get_pin()).pin);
}
- DEBUGT("logical extent {} live, initialized", c.trans, *logn);
- return e;
+ DEBUGT("logical extent {} live", c.trans, *logn);
+ return true;
} else {
- DEBUGT("logical extent {} not live, dropping", c.trans, *logn);
- c.cache.drop_from_cache(logn);
- return CachedExtentRef();
+ DEBUGT("logical extent {} not live", c.trans, *logn);
+ return false;
}
});
} else if (e->get_type() == extent_types_t::LADDR_INTERNAL) {
@@ -304,11 +303,10 @@ LBABtree::init_cached_extent_ret LBABtree::init_cached_extent(
if (cand_depth <= iter.get_depth() &&
&*iter.get_internal(cand_depth).node == &*eint) {
DEBUGT("extent {} is live", c.trans, *eint);
- return e;
+ return true;
} else {
DEBUGT("extent {} is not live", c.trans, *eint);
- c.cache.drop_from_cache(eint);
- return CachedExtentRef();
+ return false;
}
});
} else if (e->get_type() == extent_types_t::LADDR_LEAF) {
@@ -319,11 +317,10 @@ LBABtree::init_cached_extent_ret LBABtree::init_cached_extent(
// Note, this check is valid even if iter.is_end()
if (iter.leaf.node == &*eleaf) {
DEBUGT("extent {} is live", c.trans, *eleaf);
- return e;
+ return true;
} else {
DEBUGT("extent {} is not live", c.trans, *eleaf);
- c.cache.drop_from_cache(eleaf);
- return CachedExtentRef();
+ return false;
}
});
} else {
@@ -334,7 +331,7 @@ LBABtree::init_cached_extent_ret LBABtree::init_cached_extent(
e->get_type());
return init_cached_extent_ret(
interruptible::ready_future_marker{},
- e);
+ true);
}
}
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree.h
index 5caf0940474..163f7b11d9c 100644
--- a/src/crimson/os/seastore/lba_manager/btree/lba_btree.h
+++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree.h
@@ -378,10 +378,10 @@ public:
* Checks whether e is live (reachable from lba tree) and drops or initializes
* accordingly.
*
- * Returns e if live and a null CachedExtentRef otherwise.
+ * Returns if e is live.
*/
using init_cached_extent_iertr = base_iertr;
- using init_cached_extent_ret = init_cached_extent_iertr::future<CachedExtentRef>;
+ using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
init_cached_extent_ret init_cached_extent(op_context_t c, CachedExtentRef e);
/// get_leaf_if_live: get leaf node at laddr/addr if still live
diff --git a/src/crimson/os/seastore/logging.h b/src/crimson/os/seastore/logging.h
index 7153850eca3..3f1f93439ef 100644
--- a/src/crimson/os/seastore/logging.h
+++ b/src/crimson/os/seastore/logging.h
@@ -17,11 +17,11 @@
#define LOG(level_, MSG, ...) \
LOCAL_LOGGER.log(level_, "{}: " MSG, FNAME , ##__VA_ARGS__)
#define LOGT(level_, MSG, t, ...) \
- LOCAL_LOGGER.log(level_, "{}({}): " MSG, FNAME, (void*)&t , ##__VA_ARGS__)
+ LOCAL_LOGGER.log(level_, "{} {}: " MSG, (void*)&t, FNAME , ##__VA_ARGS__)
#define SUBLOG(subname_, level_, MSG, ...) \
LOGGER(subname_).log(level_, "{}: " MSG, FNAME , ##__VA_ARGS__)
#define SUBLOGT(subname_, level_, MSG, t, ...) \
- LOGGER(subname_).log(level_, "{}({}): " MSG, FNAME, (void*)&t , ##__VA_ARGS__)
+ LOGGER(subname_).log(level_, "{} {}: " MSG, (void*)&t, FNAME , ##__VA_ARGS__)
#else
@@ -35,11 +35,11 @@ void _LOG(seastar::logger& logger, std::string_view info) {
#define LOG(level_, MSG, ...) \
_LOG<level_>(LOCAL_LOGGER, "{}: " MSG ## _format(FNAME , ##__VA_ARGS__))
#define LOGT(level_, MSG, t_, ...) \
- _LOG<level_>(LOCAL_LOGGER, "{}({}): " MSG ## _format(FNAME, (void*)&t_ , ##__VA_ARGS__))
+ _LOG<level_>(LOCAL_LOGGER, "{} {}: " MSG ## _format((void*)&t_, FNAME , ##__VA_ARGS__))
#define SUBLOG(subname_, level_, MSG, ...) \
_LOG<level_>(LOGGER(subname_), "{}: " MSG ## _format(FNAME , ##__VA_ARGS__))
#define SUBLOGT(subname_, level_, MSG, t_, ...) \
- _LOG<level_>(LOGGER(subname_), "{}({}): " MSG ## _format(FNAME, (void*)&t_ , ##__VA_ARGS__))
+ _LOG<level_>(LOGGER(subname_), "{} {}: " MSG ## _format((void*)&t_, FNAME , ##__VA_ARGS__))
#endif
diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h
index 92971b44df6..8e4cb8d17f5 100644
--- a/src/crimson/os/seastore/transaction.h
+++ b/src/crimson/os/seastore/transaction.h
@@ -19,10 +19,38 @@ namespace crimson::os::seastore {
class SeaStore;
class Transaction;
+struct io_stat_t {
+ uint64_t num = 0;
+ uint64_t bytes = 0;
+
+ bool is_clear() const {
+ return (num == 0 && bytes == 0);
+ }
+
+ void increment(uint64_t _bytes) {
+ ++num;
+ bytes += _bytes;
+ }
+
+ void increment_stat(const io_stat_t& stat) {
+ num += stat.num;
+ bytes += stat.bytes;
+ }
+};
+inline std::ostream& operator<<(std::ostream& out, const io_stat_t& stat) {
+ return out << stat.num << "(" << stat.bytes << "B)";
+}
+
/**
* Transaction
*
* Representation of in-progress mutation. Used exclusively through Cache methods.
+ *
+ * Transaction log levels:
+ * seastore_t
+ * - DEBUG: transaction create, conflict, commit events
+ * - TRACE: DEBUG details
+ * - seastore_cache logs
*/
class Transaction {
public:
@@ -41,7 +69,8 @@ public:
iter != write_set.end()) {
if (out)
*out = CachedExtentRef(&*iter);
- SUBTRACET(seastore_tm, "Found offset {} in write_set: {}", *this, addr, *iter);
+ SUBTRACET(seastore_cache, "{} is present in write_set -- {}",
+ *this, addr, *iter);
return get_extent_ret::PRESENT;
} else if (
auto iter = read_set.find(addr);
@@ -51,7 +80,8 @@ public:
assert(iter->ref->get_type() != extent_types_t::RETIRED_PLACEHOLDER);
if (out)
*out = iter->ref;
- SUBTRACET(seastore_tm, "Found offset {} in read_set: {}", *this, addr, *(iter->ref));
+ SUBTRACET(seastore_cache, "{} is present in read_set -- {}",
+ *this, addr, *(iter->ref));
return get_extent_ret::PRESENT;
} else {
return get_extent_ret::ABSENT;
@@ -88,7 +118,6 @@ public:
void add_fresh_extent(
CachedExtentRef ref,
bool delayed = false) {
- LOG_PREFIX(Transaction::add_fresh_extent);
ceph_assert(!is_weak());
if (delayed) {
assert(ref->is_logical());
@@ -100,41 +129,31 @@ public:
offset += ref->get_length();
inline_block_list.push_back(ref);
}
- ++fresh_block_stats.num;
- fresh_block_stats.bytes += ref->get_length();
- SUBTRACET(seastore_tm, "adding {} to write_set", *this, *ref);
+ fresh_block_stats.increment(ref->get_length());
write_set.insert(*ref);
}
void mark_delayed_extent_inline(LogicalCachedExtentRef& ref) {
- LOG_PREFIX(Transaction::mark_delayed_extent_inline);
- SUBTRACET(seastore_tm, "removing {} from write_set", *this, *ref);
write_set.erase(*ref);
ref->set_paddr(make_record_relative_paddr(offset));
offset += ref->get_length();
inline_block_list.push_back(ref);
- SUBTRACET(seastore_tm, "adding {} to write_set", *this, *ref);
write_set.insert(*ref);
}
void mark_delayed_extent_ool(LogicalCachedExtentRef& ref, paddr_t final_addr) {
- LOG_PREFIX(Transaction::mark_delayed_extent_ool);
- SUBTRACET(seastore_tm, "removing {} from write_set", *this, *ref);
write_set.erase(*ref);
ref->set_paddr(final_addr);
assert(!ref->get_paddr().is_null());
assert(!ref->is_inline());
ool_block_list.push_back(ref);
- SUBTRACET(seastore_tm, "adding {} to write_set", *this, *ref);
write_set.insert(*ref);
}
void add_mutated_extent(CachedExtentRef ref) {
- LOG_PREFIX(Transaction::add_mutated_extent);
ceph_assert(!is_weak());
assert(read_set.count(ref->prior_instance->get_paddr()));
mutated_block_list.push_back(ref);
- SUBTRACET(seastore_tm, "adding {} to write_set", *this, *ref);
write_set.insert(*ref);
}
@@ -188,14 +207,6 @@ public:
std::for_each(inline_block_list.begin(), inline_block_list.end(), f);
}
- struct io_stat_t {
- uint64_t num = 0;
- uint64_t bytes = 0;
-
- bool is_clear() const {
- return (num == 0 && bytes == 0);
- }
- };
const io_stat_t& get_fresh_block_stats() const {
return fresh_block_stats;
}
diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc
index 3dfb8977af6..15c6672b5b4 100644
--- a/src/crimson/os/seastore/transaction_manager.cc
+++ b/src/crimson/os/seastore/transaction_manager.cc
@@ -9,6 +9,12 @@
#include "crimson/os/seastore/segment_manager.h"
#include "crimson/os/seastore/journal.h"
+/*
+ * TransactionManager logs
+ *
+ * levels:
+ * - seastore_t logs
+ */
SET_SUBSYS(seastore_tm);
namespace crimson::os::seastore {
@@ -239,17 +245,18 @@ TransactionManager::submit_transaction(
Transaction &t)
{
LOG_PREFIX(TransactionManager::submit_transaction);
+ SUBTRACET(seastore_t, "start", t);
return trans_intr::make_interruptible(
t.get_handle().enter(write_pipeline.reserve_projected_usage)
).then_interruptible([this, FNAME, &t] {
size_t projected_usage = t.get_allocation_size();
- DEBUGT("waiting for projected_usage: {}", t, projected_usage);
+ SUBTRACET(seastore_t, "waiting for projected_usage: {}", t, projected_usage);
return trans_intr::make_interruptible(
segment_cleaner->reserve_projected_usage(projected_usage)
).then_interruptible([this, &t] {
return submit_transaction_direct(t);
}).finally([this, FNAME, projected_usage, &t] {
- DEBUGT("releasing projected_usage: {}", t, projected_usage);
+ SUBTRACET(seastore_t, "releasing projected_usage: {}", t, projected_usage);
segment_cleaner->release_projected_usage(projected_usage);
});
});
@@ -260,18 +267,18 @@ TransactionManager::submit_transaction_direct(
Transaction &tref)
{
LOG_PREFIX(TransactionManager::submit_transaction_direct);
- DEBUGT("about to alloc delayed extents", tref);
-
+ SUBTRACET(seastore_t, "start", tref);
return trans_intr::make_interruptible(
tref.get_handle().enter(write_pipeline.ool_writes)
- ).then_interruptible([this, &tref] {
+ ).then_interruptible([this, FNAME, &tref] {
+ SUBTRACET(seastore_t, "process delayed and out-of-line extents", tref);
return epm->delayed_alloc_or_ool_write(tref
).handle_error_interruptible(
crimson::ct_error::input_output_error::pass_further(),
crimson::ct_error::assert_all("invalid error")
);
}).si_then([this, FNAME, &tref] {
- DEBUGT("about to prepare", tref);
+ SUBTRACET(seastore_t, "about to prepare", tref);
return tref.get_handle().enter(write_pipeline.prepare);
}).si_then([this, FNAME, &tref]() mutable
-> submit_transaction_iertr::future<> {
@@ -279,17 +286,12 @@ TransactionManager::submit_transaction_direct(
tref.get_handle().maybe_release_collection_lock();
- DEBUGT("about to submit to journal", tref);
-
+ SUBTRACET(seastore_t, "about to submit to journal", tref);
return journal->submit_record(std::move(record), tref.get_handle()
).safe_then([this, FNAME, &tref](auto submit_result) mutable {
+ SUBDEBUGT(seastore_t, "committed with {}", tref, submit_result);
auto start_seq = submit_result.write_result.start_seq;
auto end_seq = submit_result.write_result.get_end_seq();
- DEBUGT("journal commit to record_block_base={}, start_seq={}, end_seq={}",
- tref,
- submit_result.record_block_base,
- start_seq,
- end_seq);
segment_cleaner->set_journal_head(end_seq);
cache->complete_commit(
tref,
@@ -301,6 +303,7 @@ TransactionManager::submit_transaction_direct(
cache->get_oldest_dirty_from().value_or(start_seq));
auto to_release = tref.get_segment_to_release();
if (to_release != NULL_SEG_ID) {
+ SUBDEBUGT(seastore_t, "releasing segment {}", tref, to_release);
return segment_manager.release(to_release
).safe_then([this, to_release] {
segment_cleaner->mark_segment_released(to_release);
@@ -308,7 +311,8 @@ TransactionManager::submit_transaction_direct(
} else {
return SegmentManager::release_ertr::now();
}
- }).safe_then([&tref] {
+ }).safe_then([FNAME, &tref] {
+ SUBTRACET(seastore_t, "completed", tref);
return tref.get_handle().complete();
}).handle_error(
submit_transaction_iertr::pass_further{},