From 645c3122b2a95fc575f1ad4baa0d77d0d99d5527 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 18 Oct 2018 10:17:42 -0400 Subject: osd/osd_types: include PG log return codes in object copy data If the base tier records an error against an operation, the cache tier currently might incorrectly respond with a success return code. Signed-off-by: Jason Dillaman --- src/osd/osd_types.cc | 13 +++++++++++-- src/osd/osd_types.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 79b0ab8482b..dc02c3ff11b 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -4495,7 +4495,7 @@ void object_copy_cursor_t::generate_test_instances(list& void object_copy_data_t::encode(bufferlist& bl, uint64_t features) const { - ENCODE_START(7, 5, bl); + ENCODE_START(8, 5, bl); encode(size, bl); encode(mtime, bl); encode(attrs, bl); @@ -4511,6 +4511,7 @@ void object_copy_data_t::encode(bufferlist& bl, uint64_t features) const encode(reqids, bl); encode(truncate_seq, bl); encode(truncate_size, bl); + encode(reqid_return_codes, bl); ENCODE_FINISH(bl); } @@ -4574,6 +4575,9 @@ void object_copy_data_t::decode(bufferlist::const_iterator& bl) decode(truncate_seq, bl); decode(truncate_size, bl); } + if (struct_v >= 8) { + decode(reqid_return_codes, bl); + } } DECODE_FINISH(bl); } @@ -4633,12 +4637,17 @@ void object_copy_data_t::dump(Formatter *f) const f->dump_unsigned("snap", *p); f->close_section(); f->open_array_section("reqids"); + uint32_t idx = 0; for (auto p = reqids.begin(); p != reqids.end(); - ++p) { + ++idx, ++p) { f->open_object_section("extra_reqid"); f->dump_stream("reqid") << p->first; f->dump_stream("user_version") << p->second; + auto it = reqid_return_codes.find(idx); + if (it != reqid_return_codes.end()) { + f->dump_int("return_code", it->second); + } f->close_section(); } f->close_section(); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c87e3bccf46..cd92a1c76a2 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -4484,6 +4484,9 @@ struct object_copy_data_t { ///< recent reqids on this object mempool::osd_pglog::vector > reqids; + ///< map reqids by index to error return code (if any) + mempool::osd_pglog::map reqid_return_codes; + uint64_t truncate_seq; uint64_t truncate_size; -- cgit v1.2.1 From 881669f00767198411cf040ead9eb44cca8be84a Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 18 Oct 2018 11:27:33 -0400 Subject: osd/PGLog: optionally record error return codes for extra_reqids When a cache tier promotes an object with one or more error PG log entries, these errors need to be propagated and recorded for dup op detection. Signed-off-by: Jason Dillaman --- src/osd/PGLog.cc | 12 +++++++++++- src/osd/PGLog.h | 9 ++++++++- src/osd/osd_types.cc | 15 ++++++++++++--- src/osd/osd_types.h | 4 ++++ 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 1ed178b3cd3..ab9ead3fb92 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -78,10 +78,20 @@ void PGLog::IndexedLog::trim( } dups.push_back(pg_log_dup_t(e)); index(dups.back()); + uint32_t idx = 0; for (const auto& extra : e.extra_reqids) { + int return_code = e.return_code; + if (return_code >= 0) { + auto it = e.extra_reqid_return_codes.find(idx); + if (it != e.extra_reqid_return_codes.end()) { + return_code = it->second; + } + } + ++idx; + // note: extras have the same version as outer op dups.push_back(pg_log_dup_t(e.version, extra.second, - extra.first, e.return_code)); + extra.first, return_code)); index(dups.back()); } } diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 3b16e09ef54..c7658ed26d2 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -289,13 +289,20 @@ public: } p = extra_caller_ops.find(r); if (p != extra_caller_ops.end()) { + uint32_t idx = 0; for (auto i = p->second->extra_reqids.begin(); i != p->second->extra_reqids.end(); - ++i) { + ++idx, ++i) { if (i->first == r) { *version = p->second->version; *user_version = i->second; *return_code = p->second->return_code; + if (*return_code >= 0) { + auto it = p->second->extra_reqid_return_codes.find(idx); + if (it != p->second->extra_reqid_return_codes.end()) { + *return_code = it->second; + } + } return true; } } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index dc02c3ff11b..783d753edce 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -4022,7 +4022,7 @@ void pg_log_entry_t::decode_with_checksum(bufferlist::const_iterator& p) void pg_log_entry_t::encode(bufferlist &bl) const { - ENCODE_START(11, 4, bl); + ENCODE_START(12, 4, bl); encode(op, bl); encode(soid, bl); encode(version, bl); @@ -4049,12 +4049,14 @@ void pg_log_entry_t::encode(bufferlist &bl) const encode(extra_reqids, bl); if (op == ERROR) encode(return_code, bl); + if (!extra_reqids.empty()) + encode(extra_reqid_return_codes, bl); ENCODE_FINISH(bl); } void pg_log_entry_t::decode(bufferlist::const_iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(11, 4, 4, bl); + DECODE_START_LEGACY_COMPAT_LEN(12, 4, 4, bl); decode(op, bl); if (struct_v < 2) { sobject_t old_soid; @@ -4108,6 +4110,8 @@ void pg_log_entry_t::decode(bufferlist::const_iterator &bl) decode(extra_reqids, bl); if (struct_v >= 11 && op == ERROR) decode(return_code, bl); + if (struct_v >= 12 && !extra_reqids.empty()) + decode(extra_reqid_return_codes, bl); DECODE_FINISH(bl); } @@ -4119,12 +4123,17 @@ void pg_log_entry_t::dump(Formatter *f) const f->dump_stream("prior_version") << prior_version; f->dump_stream("reqid") << reqid; f->open_array_section("extra_reqids"); + uint32_t idx = 0; for (auto p = extra_reqids.begin(); p != extra_reqids.end(); - ++p) { + ++idx, ++p) { f->open_object_section("extra_reqid"); f->dump_stream("reqid") << p->first; f->dump_stream("user_version") << p->second; + auto it = extra_reqid_return_codes.find(idx); + if (it != extra_reqid_return_codes.end()) { + f->dump_int("return_code", it->second); + } f->close_section(); } f->close_section(); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index cd92a1c76a2..448d811fcb1 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -3485,6 +3485,10 @@ struct pg_log_entry_t { hobject_t soid; osd_reqid_t reqid; // caller+tid to uniquely identify request mempool::osd_pglog::vector > extra_reqids; + + ///< map extra_reqids by index to error return code (if any) + mempool::osd_pglog::map extra_reqid_return_codes; + eversion_t version, prior_version, reverting_to; version_t user_version; // the user version for this entry utime_t mtime; // this is the _user_ mtime, mind you -- cgit v1.2.1 From 45b3cedb487330822b2251e34c74a3558167cdf8 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 18 Oct 2018 13:24:18 -0400 Subject: osd/PrimaryLogPG: propagate error return codes on object copy_get ops Signed-off-by: Jason Dillaman --- src/osd/PGLog.h | 12 ++++++++++-- src/osd/PrimaryLogPG.cc | 14 +++++++++++--- src/osd/PrimaryLogPG.h | 2 ++ src/osdc/Objecter.h | 9 ++++++++- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index c7658ed26d2..532da798824 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -325,7 +325,8 @@ public: /// get a (bounded) list of recent reqids for the given object void get_object_reqids(const hobject_t& oid, unsigned max, - mempool::osd_pglog::vector > *pls) const { + mempool::osd_pglog::vector > *pls, + mempool::osd_pglog::map *return_codes) const { // make sure object is present at least once before we do an // O(n) search. if (!(indexed_data & PGLOG_INDEXED_OBJECTS)) { @@ -333,12 +334,19 @@ public: } if (objects.count(oid) == 0) return; + for (list::const_reverse_iterator i = log.rbegin(); i != log.rend(); ++i) { if (i->soid == oid) { - if (i->reqid_is_indexed()) + if (i->reqid_is_indexed()) { + if (i->op == pg_log_entry_t::ERROR) { + // propagate op errors to the cache tier's PG log + return_codes->emplace(pls->size(), i->return_code); + } pls->push_back(make_pair(i->reqid, i->user_version)); + } + pls->insert(pls->end(), i->extra_reqids.begin(), i->extra_reqids.end()); if (pls->size() >= max) { if (pls->size() > max) { diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index c0ccd802e12..875c7ad4e7a 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -8485,8 +8485,10 @@ void PrimaryLogPG::finish_ctx(OpContext *ctx, int log_op_type) } if (!ctx->extra_reqids.empty()) { - dout(20) << __func__ << " extra_reqids " << ctx->extra_reqids << dendl; + dout(20) << __func__ << " extra_reqids " << ctx->extra_reqids << " " + << ctx->extra_reqid_return_codes << dendl; ctx->log.back().extra_reqids.swap(ctx->extra_reqids); + ctx->log.back().extra_reqid_return_codes.swap(ctx->extra_reqid_return_codes); } // apply new object state. @@ -8784,7 +8786,9 @@ int PrimaryLogPG::do_copy_get(OpContext *ctx, bufferlist::const_iterator& bp, if (cursor.is_complete()) { // include reqids only in the final step. this is a bit fragile // but it works... - pg_log.get_log().get_object_reqids(ctx->obc->obs.oi.soid, 10, &reply_obj.reqids); + pg_log.get_log().get_object_reqids(ctx->obc->obs.oi.soid, 10, + &reply_obj.reqids, + &reply_obj.reqid_return_codes); dout(20) << " got reqids" << dendl; } @@ -8820,7 +8824,8 @@ void PrimaryLogPG::fill_in_copy_get_noent(OpRequestRef& op, hobject_t oid, uint64_t features = m->get_features(); object_copy_data_t reply_obj; - pg_log.get_log().get_object_reqids(oid, 10, &reply_obj.reqids); + pg_log.get_log().get_object_reqids(oid, 10, &reply_obj.reqids, + &reply_obj.reqid_return_codes); dout(20) << __func__ << " got reqids " << reply_obj.reqids << dendl; encode(reply_obj, osd_op.outdata, features); osd_op.rval = -ENOENT; @@ -8922,6 +8927,7 @@ void PrimaryLogPG::_copy_some(ObjectContextRef obc, CopyOpRef cop) &cop->results.source_data_digest, &cop->results.source_omap_digest, &cop->results.reqids, + &cop->results.reqid_return_codes, &cop->results.truncate_seq, &cop->results.truncate_size, &cop->rval); @@ -9464,6 +9470,7 @@ void PrimaryLogPG::finish_copyfrom(CopyFromCallback *cb) obs.oi.truncate_size = cb->results->truncate_size; ctx->extra_reqids = cb->results->reqids; + ctx->extra_reqid_return_codes = cb->results->reqid_return_codes; // cache: clear whiteout? if (obs.oi.is_whiteout()) { @@ -9627,6 +9634,7 @@ void PrimaryLogPG::finish_promote(int r, CopyResults *results, tctx->new_obs.exists = true; tctx->extra_reqids = results->reqids; + tctx->extra_reqid_return_codes = results->reqid_return_codes; if (whiteout) { // create a whiteout diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 3707bb98473..f93aeed5b4c 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -95,6 +95,7 @@ public: uint32_t source_data_digest, source_omap_digest; uint32_t data_digest, omap_digest; mempool::osd_pglog::vector > reqids; // [(reqid, user_version)] + mempool::osd_pglog::map reqid_return_codes; // map reqids by index to error code map attrs; // xattrs uint64_t truncate_seq; uint64_t truncate_size; @@ -576,6 +577,7 @@ public: int num_write; ///< count update ops mempool::osd_pglog::vector > extra_reqids; + mempool::osd_pglog::map extra_reqid_return_codes; hobject_t new_temp_oid, discard_temp_oid; ///< temp objects we should start/stop tracking diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 0dc1eebe80b..99b12346f38 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -737,6 +737,7 @@ struct ObjectOperation { uint32_t *out_data_digest; uint32_t *out_omap_digest; mempool::osd_pglog::vector > *out_reqids; + mempool::osd_pglog::map *out_reqid_return_codes; uint64_t *out_truncate_seq; uint64_t *out_truncate_size; int *prval; @@ -752,6 +753,7 @@ struct ObjectOperation { uint32_t *dd, uint32_t *od, mempool::osd_pglog::vector > *oreqids, + mempool::osd_pglog::map *oreqid_return_codes, uint64_t *otseq, uint64_t *otsize, int *r) @@ -761,6 +763,7 @@ struct ObjectOperation { out_omap_data(o), out_snaps(osnaps), out_snap_seq(osnap_seq), out_flags(flags), out_data_digest(dd), out_omap_digest(od), out_reqids(oreqids), + out_reqid_return_codes(oreqid_return_codes), out_truncate_seq(otseq), out_truncate_size(otsize), prval(r) {} @@ -801,6 +804,8 @@ struct ObjectOperation { *out_omap_digest = copy_reply.omap_digest; if (out_reqids) *out_reqids = copy_reply.reqids; + if (out_reqid_return_codes) + *out_reqid_return_codes = copy_reply.reqid_return_codes; if (out_truncate_seq) *out_truncate_seq = copy_reply.truncate_seq; if (out_truncate_size) @@ -827,6 +832,7 @@ struct ObjectOperation { uint32_t *out_data_digest, uint32_t *out_omap_digest, mempool::osd_pglog::vector > *out_reqids, + mempool::osd_pglog::map *out_reqid_return_codes, uint64_t *truncate_seq, uint64_t *truncate_size, int *prval) { @@ -841,7 +847,8 @@ struct ObjectOperation { out_attrs, out_data, out_omap_header, out_omap_data, out_snaps, out_snap_seq, out_flags, out_data_digest, - out_omap_digest, out_reqids, truncate_seq, + out_omap_digest, out_reqids, + out_reqid_return_codes, truncate_seq, truncate_size, prval); out_bl[p] = &h->bl; out_handler[p] = h; -- cgit v1.2.1 From eed3163f2916446008cfdb1fc119b42d7a2ed943 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 18 Oct 2018 14:04:12 -0400 Subject: osd/PrimaryLogPG: uncommitted dup ops should respond with logged return code Fixes: http://tracker.ceph.com/issues/36408 Signed-off-by: Jason Dillaman --- src/osd/PG.h | 2 +- src/osd/PrimaryLogPG.cc | 41 +++++++++++++++---------------------- src/test/librados/tier.cc | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 26 deletions(-) diff --git a/src/osd/PG.h b/src/osd/PG.h index f695f6c5e9d..5d9b48b8b50 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1283,7 +1283,7 @@ protected: map> callbacks_for_degraded_object; map > > waiting_for_ondisk; + list > > waiting_for_ondisk; void requeue_object_waiters(map>& m); void requeue_op(OpRequestRef op); diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 875c7ad4e7a..8cd65df20f6 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2167,7 +2167,7 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } else { dout(10) << " waiting for " << version << " to commit" << dendl; // always queue ondisk waiters, so that we can requeue if needed - waiting_for_ondisk[version].push_back(make_pair(op, user_version)); + waiting_for_ondisk[version].emplace_back(op, user_version, return_code); op->mark_delayed("waiting for ondisk"); } return; @@ -10371,12 +10371,13 @@ void PrimaryLogPG::eval_repop(RepGather *repop) auto it = waiting_for_ondisk.find(repop->v); if (it != waiting_for_ondisk.end()) { ceph_assert(waiting_for_ondisk.begin()->first == repop->v); - for (list >::iterator i = - it->second.begin(); - i != it->second.end(); - ++i) { - osd->reply_op_error(i->first, repop->r, repop->v, - i->second); + for (auto& i : it->second) { + int return_code = repop->r; + if (return_code >= 0) { + return_code = std::get<2>(i); + } + osd->reply_op_error(std::get<0>(i), return_code, repop->v, + std::get<1>(i)); } waiting_for_ondisk.erase(it); } @@ -11885,15 +11886,11 @@ void PrimaryLogPG::apply_and_flush_repops(bool requeue) } // also requeue any dups, interleaved into position - map > >::iterator p = - waiting_for_ondisk.find(repop->v); + auto p = waiting_for_ondisk.find(repop->v); if (p != waiting_for_ondisk.end()) { dout(10) << " also requeuing ondisk waiters " << p->second << dendl; - for (list >::iterator i = - p->second.begin(); - i != p->second.end(); - ++i) { - rq.push_back(i->first); + for (auto& i : p->second) { + rq.push_back(std::get<0>(i)); } waiting_for_ondisk.erase(p); } @@ -11907,17 +11904,11 @@ void PrimaryLogPG::apply_and_flush_repops(bool requeue) if (requeue) { requeue_ops(rq); if (!waiting_for_ondisk.empty()) { - for (map > >::iterator i = - waiting_for_ondisk.begin(); - i != waiting_for_ondisk.end(); - ++i) { - for (list >::iterator j = - i->second.begin(); - j != i->second.end(); - ++j) { - derr << __func__ << ": op " << *(j->first->get_req()) << " waiting on " - << i->first << dendl; - } + for (auto& i : waiting_for_ondisk) { + for (auto& j : i.second) { + derr << __func__ << ": op " << *(std::get<0>(j)->get_req()) + << " waiting on " << i.first << dendl; + } } ceph_assert(waiting_for_ondisk.empty()); } diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc index 4a777934af5..98d454c2238 100644 --- a/src/test/librados/tier.cc +++ b/src/test/librados/tier.cc @@ -6420,3 +6420,55 @@ TEST_F(LibRadosTwoPoolsECPP, ManifestPromoteRead) { // wait for maps to settle before next test cluster.wait_for_latest_osdmap(); } + +TEST_F(LibRadosTwoPoolsPP, PropagateBaseTierError) { + // write object to base tier + bufferlist omap_bl; + encode(static_cast(0U), omap_bl); + + ObjectWriteOperation op1; + op1.omap_set({{"somekey", omap_bl}}); + ASSERT_EQ(0, ioctx.operate("propagate-base-tier-error", &op1)); + + // configure cache + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name + + "\", \"tierpool\": \"" + cache_pool_name + + "\", \"force_nonempty\": \"--force-nonempty\" }", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name + + "\", \"mode\": \"writeback\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name + + "\", \"overlaypool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_type", "bloom"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_count", 1), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_period", 600), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "target_max_objects", 250), + inbl, NULL, NULL)); + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // guarded op should fail so expect error to propagate to cache tier + bufferlist test_omap_bl; + encode(static_cast(1U), test_omap_bl); + + ObjectWriteOperation op2; + op2.omap_cmp({{"somekey", {test_omap_bl, CEPH_OSD_CMPXATTR_OP_EQ}}}, nullptr); + op2.omap_set({{"somekey", test_omap_bl}}); + + ASSERT_EQ(-ECANCELED, ioctx.operate("propagate-base-tier-error", &op2)); +} -- cgit v1.2.1 From abba0913528d5fa107497d24ddd3591dd93b808c Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 18 Oct 2018 14:07:36 -0400 Subject: include/types: fixed compile warning for signed/unsigned comparison Signed-off-by: Jason Dillaman --- src/include/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/types.h b/src/include/types.h index 73123bc9733..8103cabbb36 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -158,7 +158,7 @@ inline std::ostream& operator<<(std::ostream& out, const std::deque& v) template inline std::ostream& operator<<(std::ostream& out, const std::tuple &t) { - auto f = [n = sizeof...(Ts), i = 0, &out](const auto& e) mutable { + auto f = [n = sizeof...(Ts), i = 0U, &out](const auto& e) mutable { out << e; if (++i != n) out << ","; -- cgit v1.2.1