// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #include "crimson/common/log.h" #include "crimson/os/seastore/logging.h" #include "crimson/os/seastore/lba_manager/btree/lba_btree.h" SET_SUBSYS(seastore_lba); namespace crimson::os::seastore::lba_manager::btree { LBABtree::mkfs_ret LBABtree::mkfs(op_context_t c) { auto root_leaf = c.cache.alloc_new_extent( c.trans, LBA_BLOCK_SIZE); root_leaf->set_size(0); lba_node_meta_t meta{0, L_ADDR_MAX, 1}; root_leaf->set_meta(meta); root_leaf->pin.set_range(meta); c.trans.get_lba_tree_stats().depth = 1u; return lba_root_t{root_leaf->get_paddr(), 1u}; } LBABtree::iterator::handle_boundary_ret LBABtree::iterator::handle_boundary( op_context_t c, mapped_space_visitor_t *visitor) { assert(at_boundary()); depth_t depth_with_space = 2; for (; depth_with_space <= get_depth(); ++depth_with_space) { if ((get_internal(depth_with_space).pos + 1) < get_internal(depth_with_space).node->get_size()) { break; } } if (depth_with_space <= get_depth()) { return seastar::do_with( [](const LBAInternalNode &internal) { return internal.begin(); }, [](const LBALeafNode &leaf) { return leaf.begin(); }, [this, c, depth_with_space, visitor](auto &li, auto &ll) { for (depth_t depth = 2; depth < depth_with_space; ++depth) { get_internal(depth).reset(); } leaf.reset(); get_internal(depth_with_space).pos++; // note, cannot result in at_boundary() by construction return lookup_depth_range( c, *this, depth_with_space - 1, 0, li, ll, visitor ); }); } else { // end return seastar::now(); } } LBABtree::iterator_fut LBABtree::iterator::next( op_context_t c, mapped_space_visitor_t *visitor) const { assert_valid(); assert(!is_end()); auto ret = *this; ret.leaf.pos++; if (ret.at_boundary()) { return seastar::do_with( ret, [c, visitor](auto &ret) mutable { return ret.handle_boundary( c, visitor ).si_then([&ret] { return std::move(ret); }); }); } else { return iterator_fut( interruptible::ready_future_marker{}, ret); } } LBABtree::iterator_fut LBABtree::iterator::prev(op_context_t c) const { assert_valid(); assert(!is_begin()); auto ret = *this; if (ret.leaf.pos > 0) { ret.leaf.pos--; return iterator_fut( interruptible::ready_future_marker{}, ret); } depth_t depth_with_space = 2; for (; depth_with_space <= get_depth(); ++depth_with_space) { if (ret.get_internal(depth_with_space).pos > 0) { break; } } assert(depth_with_space <= ret.get_depth()); // must not be begin() return seastar::do_with( std::move(ret), [](const LBAInternalNode &internal) { return --internal.end(); }, [](const LBALeafNode &leaf) { return --leaf.end(); }, [c, depth_with_space](auto &ret, auto &li, auto &ll) { for (depth_t depth = 2; depth < depth_with_space; ++depth) { ret.get_internal(depth).reset(); } ret.leaf.reset(); ret.get_internal(depth_with_space).pos--; // note, cannot result in at_boundary() by construction return lookup_depth_range( c, ret, depth_with_space - 1, 0, li, ll, nullptr ).si_then([&ret] { assert(!ret.at_boundary()); return std::move(ret); }); }); } LBABtree::iterator_fut LBABtree::lower_bound( op_context_t c, laddr_t addr, mapped_space_visitor_t *visitor) const { LOG_PREFIX(LBATree::lower_bound); return lookup( c, [addr](const LBAInternalNode &internal) { assert(internal.get_size() > 0); auto iter = internal.upper_bound(addr); assert(iter != internal.begin()); --iter; return iter; }, [FNAME, c, addr](const LBALeafNode &leaf) { auto ret = leaf.lower_bound(addr); DEBUGT( "leaf addr {}, got ret offset {}, size {}, end {}", c.trans, addr, ret.get_offset(), leaf.get_size(), ret == leaf.end()); return ret; }, visitor ).si_then([FNAME, c](auto &&ret) { DEBUGT( "ret.leaf.pos {}", c.trans, ret.leaf.pos); ret.assert_valid(); return std::move(ret); }); } LBABtree::insert_ret LBABtree::insert( op_context_t c, iterator iter, laddr_t laddr, lba_map_val_t val) { LOG_PREFIX(LBATree::insert); DEBUGT( "inserting laddr {} at iter {}", c.trans, laddr, iter.is_end() ? L_ADDR_MAX : iter.get_key()); return seastar::do_with( iter, [this, c, laddr, val](auto &ret) { return find_insertion( c, laddr, ret ).si_then([this, c, laddr, val, &ret] { if (!ret.at_boundary() && ret.get_key() == laddr) { return insert_ret( interruptible::ready_future_marker{}, std::make_pair(ret, false)); } else { ++(c.trans.get_lba_tree_stats().num_inserts); return handle_split( c, ret ).si_then([c, laddr, val, &ret] { if (!ret.leaf.node->is_pending()) { CachedExtentRef mut = c.cache.duplicate_for_write( c.trans, ret.leaf.node ); ret.leaf.node = mut->cast(); } auto iter = LBALeafNode::const_iterator( ret.leaf.node.get(), ret.leaf.pos); assert(iter == ret.leaf.node->lower_bound(laddr)); assert(iter == ret.leaf.node->end() || iter->get_key() > laddr); assert(laddr >= ret.leaf.node->get_meta().begin && laddr < ret.leaf.node->get_meta().end); ret.leaf.node->insert(iter, laddr, val); return insert_ret( interruptible::ready_future_marker{}, std::make_pair(ret, true)); }); } }); }); } LBABtree::update_ret LBABtree::update( op_context_t c, iterator iter, lba_map_val_t val) { LOG_PREFIX(LBATree::update); DEBUGT( "update element at {}", c.trans, iter.is_end() ? L_ADDR_MAX : iter.get_key()); if (!iter.leaf.node->is_pending()) { CachedExtentRef mut = c.cache.duplicate_for_write( c.trans, iter.leaf.node ); iter.leaf.node = mut->cast(); } iter.leaf.node->update( iter.leaf.node->iter_idx(iter.leaf.pos), val); return update_ret( interruptible::ready_future_marker{}, iter); } LBABtree::remove_ret LBABtree::remove( op_context_t c, iterator iter) { LOG_PREFIX(LBATree::remove); DEBUGT( "remove element at {}", c.trans, iter.is_end() ? L_ADDR_MAX : iter.get_key()); assert(!iter.is_end()); ++(c.trans.get_lba_tree_stats().num_erases); return seastar::do_with( iter, [this, c](auto &ret) { if (!ret.leaf.node->is_pending()) { CachedExtentRef mut = c.cache.duplicate_for_write( c.trans, ret.leaf.node ); ret.leaf.node = mut->cast(); } ret.leaf.node->remove( ret.leaf.node->iter_idx(ret.leaf.pos)); return handle_merge( c, ret ); }); } LBABtree::init_cached_extent_ret LBABtree::init_cached_extent( op_context_t c, CachedExtentRef e) { LOG_PREFIX(LBATree::init_cached_extent); DEBUGT("extent {}", c.trans, *e); if (e->is_logical()) { auto logn = e->cast(); return lower_bound( c, logn->get_laddr() ).si_then([FNAME, e, c, logn](auto iter) { if (!iter.is_end() && iter.get_key() == logn->get_laddr() && iter.get_val().paddr == logn->get_paddr()) { logn->set_pin(iter.get_pin()); ceph_assert(iter.get_val().len == e->get_length()); if (c.pins) { c.pins->add_pin( static_cast(logn->get_pin()).pin); } DEBUGT("logical extent {} live, initialized", c.trans, *logn); return e; } else { DEBUGT("logical extent {} not live, dropping", c.trans, *logn); c.cache.drop_from_cache(logn); return CachedExtentRef(); } }); } else if (e->get_type() == extent_types_t::LADDR_INTERNAL) { auto eint = e->cast(); return lower_bound( c, eint->get_node_meta().begin ).si_then([FNAME, e, c, eint](auto iter) { // Note, this check is valid even if iter.is_end() depth_t cand_depth = eint->get_node_meta().depth; if (cand_depth <= iter.get_depth() && &*iter.get_internal(cand_depth).node == &*eint) { DEBUGT("extent {} is live", c.trans, *eint); return e; } else { DEBUGT("extent {} is not live", c.trans, *eint); c.cache.drop_from_cache(eint); return CachedExtentRef(); } }); } else if (e->get_type() == extent_types_t::LADDR_LEAF) { auto eleaf = e->cast(); return lower_bound( c, eleaf->get_node_meta().begin ).si_then([FNAME, c, e, eleaf](auto iter) { // Note, this check is valid even if iter.is_end() if (iter.leaf.node == &*eleaf) { DEBUGT("extent {} is live", c.trans, *eleaf); return e; } else { DEBUGT("extent {} is not live", c.trans, *eleaf); c.cache.drop_from_cache(eleaf); return CachedExtentRef(); } }); } else { DEBUGT( "found other extent {} type {}", c.trans, *e, e->get_type()); return init_cached_extent_ret( interruptible::ready_future_marker{}, e); } } LBABtree::get_internal_if_live_ret LBABtree::get_internal_if_live( op_context_t c, paddr_t addr, laddr_t laddr, seastore_off_t len) { LOG_PREFIX(BtreeLBAManager::get_leaf_if_live); return lower_bound( c, laddr ).si_then([FNAME, c, addr, laddr, len](auto iter) { for (depth_t d = 2; d <= iter.get_depth(); ++d) { CachedExtent &node = *iter.get_internal(d).node; auto internal_node = node.cast(); if (internal_node->get_paddr() == addr) { DEBUGT( "extent laddr {} addr {}~{} found: {}", c.trans, laddr, addr, len, *internal_node); assert(internal_node->get_node_meta().begin == laddr); return CachedExtentRef(internal_node); } } DEBUGT( "extent laddr {} addr {}~{} is not live, no matching internal node", c.trans, laddr, addr, len); return CachedExtentRef(); }); } LBABtree::get_leaf_if_live_ret LBABtree::get_leaf_if_live( op_context_t c, paddr_t addr, laddr_t laddr, seastore_off_t len) { LOG_PREFIX(BtreeLBAManager::get_leaf_if_live); return lower_bound( c, laddr ).si_then([FNAME, c, addr, laddr, len](auto iter) { if (iter.leaf.node->get_paddr() == addr) { DEBUGT( "extent laddr {} addr {}~{} found: {}", c.trans, laddr, addr, len, *iter.leaf.node); return CachedExtentRef(iter.leaf.node); } else { DEBUGT( "extent laddr {} addr {}~{} is not live, does not match node {}", c.trans, laddr, addr, len, *iter.leaf.node); return CachedExtentRef(); } }); } LBABtree::rewrite_lba_extent_ret LBABtree::rewrite_lba_extent( op_context_t c, CachedExtentRef e) { LOG_PREFIX(LBABtree::rewrite_lba_extent); assert(e->get_type() == extent_types_t::LADDR_INTERNAL || e->get_type() == extent_types_t::LADDR_LEAF); auto do_rewrite = [&](auto &lba_extent) { auto nlba_extent = c.cache.alloc_new_extent< std::remove_reference_t >( c.trans, lba_extent.get_length()); lba_extent.get_bptr().copy_out( 0, lba_extent.get_length(), nlba_extent->get_bptr().c_str()); nlba_extent->pin.set_range(nlba_extent->get_node_meta()); /* This is a bit underhanded. Any relative addrs here must necessarily * be record relative as we are rewriting a dirty extent. Thus, we * are using resolve_relative_addrs with a (likely negative) block * relative offset to correct them to block-relative offsets adjusted * for our new transaction location. * * Upon commit, these now block relative addresses will be interpretted * against the real final address. */ nlba_extent->resolve_relative_addrs( make_record_relative_paddr(0) - nlba_extent->get_paddr()); DEBUGT( "rewriting {} into {}", c.trans, lba_extent, *nlba_extent); return update_internal_mapping( c, nlba_extent->get_node_meta().depth, nlba_extent->get_node_meta().begin, e->get_paddr(), nlba_extent->get_paddr() ).si_then([c, e] { c.cache.retire_extent(c.trans, e); }); }; CachedExtentRef nlba_extent; if (e->get_type() == extent_types_t::LADDR_INTERNAL) { auto lint = e->cast(); return do_rewrite(*lint); } else { assert(e->get_type() == extent_types_t::LADDR_LEAF); auto lleaf = e->cast(); return do_rewrite(*lleaf); } } LBABtree::get_internal_node_ret LBABtree::get_internal_node( op_context_t c, depth_t depth, paddr_t offset, laddr_t begin, laddr_t end) { LOG_PREFIX(LBATree::get_internal_node); DEBUGT( "reading internal at offset {}, depth {}, begin {}, end {}", c.trans, offset, depth, begin, end); assert(depth > 1); auto init_internal = [c, depth, begin, end](LBAInternalNode &node) { assert(!node.is_pending()); assert(!node.pin.is_linked()); node.pin.set_range(lba_node_meta_t{begin, end, depth}); if (c.pins) { c.pins->add_pin(node.pin); } }; return c.cache.get_extent( c.trans, offset, LBA_BLOCK_SIZE, init_internal ).si_then([FNAME, c, offset, init_internal, depth, begin, end]( LBAInternalNodeRef ret) { DEBUGT( "read internal at offset {} {}", c.trans, offset, *ret); // This can only happen during init_cached_extent if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) { assert(ret->is_dirty()); init_internal(*ret); } auto meta = ret->get_meta(); if (ret->get_size()) { ceph_assert(meta.begin <= ret->begin()->get_key()); ceph_assert(meta.end > (ret->end() - 1)->get_key()); } ceph_assert(depth == meta.depth); ceph_assert(begin == meta.begin); ceph_assert(end == meta.end); return get_internal_node_ret( interruptible::ready_future_marker{}, ret); }); } LBABtree::get_leaf_node_ret LBABtree::get_leaf_node( op_context_t c, paddr_t offset, laddr_t begin, laddr_t end) { LOG_PREFIX(LBATree::get_leaf_node); DEBUGT( "reading leaf at offset {}, begin {}, end {}", c.trans, offset, begin, end); auto init_leaf = [c, begin, end](LBALeafNode &node) { assert(!node.is_pending()); assert(!node.pin.is_linked()); node.pin.set_range(lba_node_meta_t{begin, end, 1}); if (c.pins) { c.pins->add_pin(node.pin); } }; return c.cache.get_extent( c.trans, offset, LBA_BLOCK_SIZE, init_leaf ).si_then([FNAME, c, offset, init_leaf, begin, end](LBALeafNodeRef ret) { DEBUGT( "read leaf at offset {} {}", c.trans, offset, *ret); // This can only happen during init_cached_extent if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) { assert(ret->is_dirty()); init_leaf(*ret); } auto meta = ret->get_meta(); if (ret->get_size()) { ceph_assert(meta.begin <= ret->begin()->get_key()); ceph_assert(meta.end > (ret->end() - 1)->get_key()); } ceph_assert(1 == meta.depth); ceph_assert(begin == meta.begin); ceph_assert(end == meta.end); return get_leaf_node_ret( interruptible::ready_future_marker{}, ret); }); } LBABtree::find_insertion_ret LBABtree::find_insertion( op_context_t c, laddr_t laddr, iterator &iter) { assert(iter.is_end() || iter.get_key() >= laddr); if (!iter.is_end() && iter.get_key() == laddr) { return seastar::now(); } else if (iter.leaf.node->get_node_meta().begin <= laddr) { #ifndef NDEBUG auto p = iter; if (p.leaf.pos > 0) { --p.leaf.pos; assert(p.get_key() < laddr); } #endif return seastar::now(); } else { assert(iter.leaf.pos == 0); return iter.prev( c ).si_then([laddr, &iter](auto p) { assert(p.leaf.node->get_node_meta().begin <= laddr); assert(p.get_key() < laddr); // Note, this is specifically allowed to violate the iterator // invariant that pos is a valid index for the node in the event // that the insertion point is at the end of a node. p.leaf.pos++; assert(p.at_boundary()); iter = p; return seastar::now(); }); } } LBABtree::handle_split_ret LBABtree::handle_split( op_context_t c, iterator &iter) { LOG_PREFIX(LBATree::handle_split); depth_t split_from = iter.check_split(); DEBUGT("split_from {}, depth {}", c.trans, split_from, iter.get_depth()); if (split_from == iter.get_depth()) { auto nroot = c.cache.alloc_new_extent( c.trans, LBA_BLOCK_SIZE); lba_node_meta_t meta{0, L_ADDR_MAX, iter.get_depth() + 1}; nroot->set_meta(meta); nroot->pin.set_range(meta); nroot->journal_insert( nroot->begin(), L_ADDR_MIN, root.get_location(), nullptr); iter.internal.push_back({nroot, 0}); root.set_location(nroot->get_paddr()); root.set_depth(iter.get_depth()); c.trans.get_lba_tree_stats().depth = iter.get_depth(); root_dirty = true; } /* pos may be either node_position_t or * node_position_t */ auto split_level = [&](auto &parent_pos, auto &pos) { LOG_PREFIX(LBATree::handle_split); auto [left, right, pivot] = pos.node->make_split_children(c); auto parent_node = parent_pos.node; auto parent_iter = parent_pos.get_iter(); parent_node->update( parent_iter, left->get_paddr()); parent_node->insert( parent_iter + 1, pivot, right->get_paddr()); DEBUGT("splitted {} into left: {}, right: {}", c.trans, *pos.node, *left, *right); c.cache.retire_extent(c.trans, pos.node); return std::make_pair(left, right); }; for (; split_from > 0; --split_from) { auto &parent_pos = iter.get_internal(split_from + 1); if (!parent_pos.node->is_pending()) { parent_pos.node = c.cache.duplicate_for_write( c.trans, parent_pos.node )->cast(); } if (split_from > 1) { auto &pos = iter.get_internal(split_from); DEBUGT("splitting internal {} at depth {}, parent: {} at pos: {}", c.trans, *pos.node, split_from, *parent_pos.node, parent_pos.pos); auto [left, right] = split_level(parent_pos, pos); if (pos.pos < left->get_size()) { pos.node = left; } else { pos.node = right; pos.pos -= left->get_size(); parent_pos.pos += 1; } } else { auto &pos = iter.leaf; DEBUGT("splitting leaf {}, parent: {} at pos: {}", c.trans, *pos.node, *parent_pos.node, parent_pos.pos); auto [left, right] = split_level(parent_pos, pos); /* right->get_node_meta().begin == pivot == right->begin()->get_key() * Thus, if pos.pos == left->get_size(), we want iter to point to * left with pos.pos at the end rather than right with pos.pos = 0 * since the insertion would be to the left of the first element * of right and thus necessarily less than right->get_node_meta().begin. */ if (pos.pos <= left->get_size()) { pos.node = left; } else { pos.node = right; pos.pos -= left->get_size(); parent_pos.pos += 1; } } } return seastar::now(); } template LBABtree::base_iertr::future get_node( op_context_t c, depth_t depth, paddr_t addr, laddr_t begin, laddr_t end); template <> LBABtree::base_iertr::future get_node( op_context_t c, depth_t depth, paddr_t addr, laddr_t begin, laddr_t end) { assert(depth == 1); return LBABtree::get_leaf_node(c, addr, begin, end); } template <> LBABtree::base_iertr::future get_node( op_context_t c, depth_t depth, paddr_t addr, laddr_t begin, laddr_t end) { return LBABtree::get_internal_node(c, depth, addr, begin, end); } template LBABtree::handle_merge_ret merge_level( op_context_t c, depth_t depth, LBABtree::node_position_t &parent_pos, LBABtree::node_position_t &pos) { LOG_PREFIX(LBABtree::merge_level); if (!parent_pos.node->is_pending()) { parent_pos.node = c.cache.duplicate_for_write( c.trans, parent_pos.node )->cast(); } auto iter = parent_pos.get_iter(); assert(iter.get_offset() < parent_pos.node->get_size()); bool donor_is_left = ((iter.get_offset() + 1) == parent_pos.node->get_size()); auto donor_iter = donor_is_left ? (iter - 1) : (iter + 1); auto next_iter = donor_iter + 1; auto begin = donor_iter->get_key(); auto end = next_iter == parent_pos.node->end() ? parent_pos.node->get_node_meta().end : next_iter->get_key(); DEBUGT("parent: {}, node: {}", c.trans, *parent_pos.node, *pos.node); return get_node( c, depth, donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr()), begin, end ).si_then([c, iter, donor_iter, donor_is_left, &parent_pos, &pos]( typename NodeType::Ref donor) { LOG_PREFIX(LBABtree::merge_level); auto [l, r] = donor_is_left ? std::make_pair(donor, pos.node) : std::make_pair(pos.node, donor); auto [liter, riter] = donor_is_left ? std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter); if (donor->at_min_capacity()) { auto replacement = l->make_full_merge(c, r); parent_pos.node->update( liter, replacement->get_paddr()); parent_pos.node->remove(riter); pos.node = replacement; if (donor_is_left) { pos.pos += r->get_size(); parent_pos.pos--; } DEBUGT("l: {}, r: {}, replacement: {}", c.trans, *l, *r, *replacement); c.cache.retire_extent(c.trans, l); c.cache.retire_extent(c.trans, r); } else { LOG_PREFIX(LBABtree::merge_level); auto [replacement_l, replacement_r, pivot] = l->make_balanced( c, r, !donor_is_left); parent_pos.node->update( liter, replacement_l->get_paddr()); parent_pos.node->replace( riter, pivot, replacement_r->get_paddr()); if (donor_is_left) { assert(parent_pos.pos > 0); parent_pos.pos--; } auto orig_position = donor_is_left ? l->get_size() + pos.pos : pos.pos; if (orig_position < replacement_l->get_size()) { pos.node = replacement_l; pos.pos = orig_position; } else { parent_pos.pos++; pos.node = replacement_r; pos.pos = orig_position - replacement_l->get_size(); } DEBUGT("l: {}, r: {}, replacement_l: {}, replacement_r: {}", c.trans, *l, *r, *replacement_l, *replacement_r); c.cache.retire_extent(c.trans, l); c.cache.retire_extent(c.trans, r); } return seastar::now(); }); } LBABtree::handle_merge_ret LBABtree::handle_merge( op_context_t c, iterator &iter) { LOG_PREFIX(LBATree::handle_merge); if (iter.get_depth() == 1 || !iter.leaf.node->below_min_capacity()) { DEBUGT( "no need to merge leaf, leaf size {}, depth {}", c.trans, iter.leaf.node->get_size(), iter.get_depth()); return seastar::now(); } return seastar::do_with( depth_t{1}, [FNAME, this, c, &iter](auto &to_merge) { return trans_intr::repeat( [FNAME, this, c, &iter, &to_merge] { DEBUGT( "merging depth {}", c.trans, to_merge); auto &parent_pos = iter.get_internal(to_merge + 1); auto merge_fut = handle_merge_iertr::now(); if (to_merge > 1) { auto &pos = iter.get_internal(to_merge); merge_fut = merge_level(c, to_merge, parent_pos, pos); } else { auto &pos = iter.leaf; merge_fut = merge_level(c, to_merge, parent_pos, pos); } return merge_fut.si_then([FNAME, this, c, &iter, &to_merge] { ++to_merge; auto &pos = iter.get_internal(to_merge); if (to_merge == iter.get_depth()) { if (pos.node->get_size() == 1) { DEBUGT("collapsing root", c.trans); c.cache.retire_extent(c.trans, pos.node); assert(pos.pos == 0); auto node_iter = pos.get_iter(); root.set_location( node_iter->get_val().maybe_relative_to(pos.node->get_paddr())); iter.internal.pop_back(); root.set_depth(iter.get_depth()); c.trans.get_lba_tree_stats().depth = iter.get_depth(); root_dirty = true; } else { DEBUGT("no need to collapse root", c.trans); } return seastar::stop_iteration::yes; } else if (pos.node->below_min_capacity()) { DEBUGT( "continuing, next node {} depth {} at min", c.trans, *pos.node, to_merge); return seastar::stop_iteration::no; } else { DEBUGT( "complete, next node {} depth {} not min", c.trans, *pos.node, to_merge); return seastar::stop_iteration::yes; } }); }); }); } LBABtree::update_internal_mapping_ret LBABtree::update_internal_mapping( op_context_t c, depth_t depth, laddr_t laddr, paddr_t old_addr, paddr_t new_addr) { LOG_PREFIX(LBATree::update_internal_mapping); DEBUGT( "updating laddr {} at depth {} from {} to {}", c.trans, laddr, depth, old_addr, new_addr); return lower_bound( c, laddr ).si_then([=](auto iter) { assert(iter.get_depth() >= depth); if (depth == iter.get_depth()) { DEBUGT("update at root", c.trans); if (laddr != 0) { ERRORT( "updating root laddr {} at depth {} from {} to {}," "laddr is not 0", c.trans, laddr, depth, old_addr, new_addr, root.get_location()); ceph_assert(0 == "impossible"); } if (root.get_location() != old_addr) { ERRORT( "updating root laddr {} at depth {} from {} to {}," "root addr {} does not match", c.trans, laddr, depth, old_addr, new_addr, root.get_location()); ceph_assert(0 == "impossible"); } root.set_location(new_addr); root_dirty = true; } else { auto &parent = iter.get_internal(depth + 1); assert(parent.node); assert(parent.pos < parent.node->get_size()); auto piter = parent.node->iter_idx(parent.pos); if (piter->get_key() != laddr) { ERRORT( "updating laddr {} at depth {} from {} to {}," "node {} pos {} val pivot addr {} does not match", c.trans, laddr, depth, old_addr, new_addr, *(parent.node), parent.pos, piter->get_key()); ceph_assert(0 == "impossible"); } if (piter->get_val() != old_addr) { ERRORT( "updating laddr {} at depth {} from {} to {}," "node {} pos {} val addr {} does not match", c.trans, laddr, depth, old_addr, new_addr, *(parent.node), parent.pos, piter->get_val()); ceph_assert(0 == "impossible"); } CachedExtentRef mut = c.cache.duplicate_for_write( c.trans, parent.node ); LBAInternalNodeRef mparent = mut->cast(); mparent->update(piter, new_addr); /* Note, iter is now invalid as we didn't udpate either the parent * node reference to the new mutable instance nor did we update the * child pointer to the new node. Not a problem as we'll now just * destruct it. */ } return seastar::now(); }); } }