summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Dillaman <dillaman@redhat.com>2018-10-18 14:04:12 -0400
committerJason Dillaman <dillaman@redhat.com>2018-10-29 10:48:45 -0400
commiteed3163f2916446008cfdb1fc119b42d7a2ed943 (patch)
treef206d3b804b8f5d6aeaaa6acfb08563034912ec7
parent45b3cedb487330822b2251e34c74a3558167cdf8 (diff)
downloadceph-eed3163f2916446008cfdb1fc119b42d7a2ed943.tar.gz
osd/PrimaryLogPG: uncommitted dup ops should respond with logged return code
Fixes: http://tracker.ceph.com/issues/36408 Signed-off-by: Jason Dillaman <dillaman@redhat.com>
-rw-r--r--src/osd/PG.h2
-rw-r--r--src/osd/PrimaryLogPG.cc41
-rw-r--r--src/test/librados/tier.cc52
3 files changed, 69 insertions, 26 deletions
diff --git a/src/osd/PG.h b/src/osd/PG.h
index f695f6c5e9d..5d9b48b8b50 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1283,7 +1283,7 @@ protected:
map<hobject_t, list<Context*>> callbacks_for_degraded_object;
map<eversion_t,
- list<pair<OpRequestRef, version_t> > > waiting_for_ondisk;
+ list<tuple<OpRequestRef, version_t, int> > > waiting_for_ondisk;
void requeue_object_waiters(map<hobject_t, list<OpRequestRef>>& m);
void requeue_op(OpRequestRef op);
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc
index 875c7ad4e7a..8cd65df20f6 100644
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -2167,7 +2167,7 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
} else {
dout(10) << " waiting for " << version << " to commit" << dendl;
// always queue ondisk waiters, so that we can requeue if needed
- waiting_for_ondisk[version].push_back(make_pair(op, user_version));
+ waiting_for_ondisk[version].emplace_back(op, user_version, return_code);
op->mark_delayed("waiting for ondisk");
}
return;
@@ -10371,12 +10371,13 @@ void PrimaryLogPG::eval_repop(RepGather *repop)
auto it = waiting_for_ondisk.find(repop->v);
if (it != waiting_for_ondisk.end()) {
ceph_assert(waiting_for_ondisk.begin()->first == repop->v);
- for (list<pair<OpRequestRef, version_t> >::iterator i =
- it->second.begin();
- i != it->second.end();
- ++i) {
- osd->reply_op_error(i->first, repop->r, repop->v,
- i->second);
+ for (auto& i : it->second) {
+ int return_code = repop->r;
+ if (return_code >= 0) {
+ return_code = std::get<2>(i);
+ }
+ osd->reply_op_error(std::get<0>(i), return_code, repop->v,
+ std::get<1>(i));
}
waiting_for_ondisk.erase(it);
}
@@ -11885,15 +11886,11 @@ void PrimaryLogPG::apply_and_flush_repops(bool requeue)
}
// also requeue any dups, interleaved into position
- map<eversion_t, list<pair<OpRequestRef, version_t> > >::iterator p =
- waiting_for_ondisk.find(repop->v);
+ auto p = waiting_for_ondisk.find(repop->v);
if (p != waiting_for_ondisk.end()) {
dout(10) << " also requeuing ondisk waiters " << p->second << dendl;
- for (list<pair<OpRequestRef, version_t> >::iterator i =
- p->second.begin();
- i != p->second.end();
- ++i) {
- rq.push_back(i->first);
+ for (auto& i : p->second) {
+ rq.push_back(std::get<0>(i));
}
waiting_for_ondisk.erase(p);
}
@@ -11907,17 +11904,11 @@ void PrimaryLogPG::apply_and_flush_repops(bool requeue)
if (requeue) {
requeue_ops(rq);
if (!waiting_for_ondisk.empty()) {
- for (map<eversion_t, list<pair<OpRequestRef, version_t> > >::iterator i =
- waiting_for_ondisk.begin();
- i != waiting_for_ondisk.end();
- ++i) {
- for (list<pair<OpRequestRef, version_t> >::iterator j =
- i->second.begin();
- j != i->second.end();
- ++j) {
- derr << __func__ << ": op " << *(j->first->get_req()) << " waiting on "
- << i->first << dendl;
- }
+ for (auto& i : waiting_for_ondisk) {
+ for (auto& j : i.second) {
+ derr << __func__ << ": op " << *(std::get<0>(j)->get_req())
+ << " waiting on " << i.first << dendl;
+ }
}
ceph_assert(waiting_for_ondisk.empty());
}
diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc
index 4a777934af5..98d454c2238 100644
--- a/src/test/librados/tier.cc
+++ b/src/test/librados/tier.cc
@@ -6420,3 +6420,55 @@ TEST_F(LibRadosTwoPoolsECPP, ManifestPromoteRead) {
// wait for maps to settle before next test
cluster.wait_for_latest_osdmap();
}
+
+TEST_F(LibRadosTwoPoolsPP, PropagateBaseTierError) {
+ // write object to base tier
+ bufferlist omap_bl;
+ encode(static_cast<uint32_t>(0U), omap_bl);
+
+ ObjectWriteOperation op1;
+ op1.omap_set({{"somekey", omap_bl}});
+ ASSERT_EQ(0, ioctx.operate("propagate-base-tier-error", &op1));
+
+ // configure cache
+ bufferlist inbl;
+ ASSERT_EQ(0, cluster.mon_command(
+ "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
+ "\", \"tierpool\": \"" + cache_pool_name +
+ "\", \"force_nonempty\": \"--force-nonempty\" }",
+ inbl, NULL, NULL));
+ ASSERT_EQ(0, cluster.mon_command(
+ "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
+ "\", \"mode\": \"writeback\"}",
+ inbl, NULL, NULL));
+ ASSERT_EQ(0, cluster.mon_command(
+ "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
+ "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
+ inbl, NULL, NULL));
+
+ ASSERT_EQ(0, cluster.mon_command(
+ set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
+ inbl, NULL, NULL));
+ ASSERT_EQ(0, cluster.mon_command(
+ set_pool_str(cache_pool_name, "hit_set_count", 1),
+ inbl, NULL, NULL));
+ ASSERT_EQ(0, cluster.mon_command(
+ set_pool_str(cache_pool_name, "hit_set_period", 600),
+ inbl, NULL, NULL));
+ ASSERT_EQ(0, cluster.mon_command(
+ set_pool_str(cache_pool_name, "target_max_objects", 250),
+ inbl, NULL, NULL));
+
+ // wait for maps to settle
+ cluster.wait_for_latest_osdmap();
+
+ // guarded op should fail so expect error to propagate to cache tier
+ bufferlist test_omap_bl;
+ encode(static_cast<uint32_t>(1U), test_omap_bl);
+
+ ObjectWriteOperation op2;
+ op2.omap_cmp({{"somekey", {test_omap_bl, CEPH_OSD_CMPXATTR_OP_EQ}}}, nullptr);
+ op2.omap_set({{"somekey", test_omap_bl}});
+
+ ASSERT_EQ(-ECANCELED, ioctx.operate("propagate-base-tier-error", &op2));
+}