summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-09-03 15:39:18 -0700
committerSamuel Just <sam.just@inktank.com>2013-09-23 22:54:58 -0700
commitfadb429555d934516d4ee075245d7b7eb76643b6 (patch)
tree062c1c97c2adbce1442a029e0adae8cf6c320567
parent295d74ba83f6d7462794d5e94730417ee641fc18 (diff)
downloadceph-fadb429555d934516d4ee075245d7b7eb76643b6.tar.gz
ReplicatedPG/Backend: split recover_missing out of prepare_pull
Also, move prepare_pull to PGBackend. Signed-off-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/common/hobject.h21
-rw-r--r--src/osd/ReplicatedBackend.h11
-rw-r--r--src/osd/ReplicatedPG.cc243
-rw-r--r--src/osd/ReplicatedPG.h13
4 files changed, 180 insertions, 108 deletions
diff --git a/src/common/hobject.h b/src/common/hobject.h
index 4b6a33c6697..a2e7e5a9215 100644
--- a/src/common/hobject.h
+++ b/src/common/hobject.h
@@ -79,9 +79,28 @@ public:
return ret;
}
+ /// @return head version of this hobject_t
+ hobject_t get_head() const {
+ hobject_t ret(*this);
+ ret.snap = CEPH_NOSNAP;
+ return ret;
+ }
+
+ /// @return snapdir version of this hobject_t
+ hobject_t get_snapdir() const {
+ hobject_t ret(*this);
+ ret.snap = CEPH_SNAPDIR;
+ return ret;
+ }
+
+ /// @return true if object is neither head nor snapdir
+ bool is_snap() const {
+ return (snap != CEPH_NOSNAP) && (snap != CEPH_SNAPDIR);
+ }
+
/// @return true iff the object should have a snapset in it's attrs
bool has_snapset() const {
- return (snap == CEPH_NOSNAP) || (snap == CEPH_SNAPDIR);
+ return !is_snap();
}
/* Do not use when a particular hash function is needed */
diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h
index 3115afab956..e703d4c333c 100644
--- a/src/osd/ReplicatedBackend.h
+++ b/src/osd/ReplicatedBackend.h
@@ -256,6 +256,17 @@ private:
ObjectStore::Transaction *t);
void submit_push_complete(ObjectRecoveryInfo &recovery_info,
ObjectStore::Transaction *t);
+
+ void calc_clone_subsets(
+ SnapSet& snapset, const hobject_t& poid, const pg_missing_t& missing,
+ const hobject_t &last_backfill,
+ interval_set<uint64_t>& data_subset,
+ map<hobject_t, interval_set<uint64_t> >& clone_subsets);
+ void prepare_pull(
+ const hobject_t& soid,
+ ObjectContextRef headctx,
+ int priority,
+ RPGHandle *h);
};
#endif
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 1f287d4fc9a..b3f5324059d 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -277,10 +277,9 @@ void ReplicatedPG::wait_for_missing_object(const hobject_t& soid, OpRequestRef o
}
else {
dout(7) << "missing " << soid << " v " << v << ", pulling." << dendl;
- map<int, vector<PullOp> > pulls;
- prepare_pull(soid, v, cct->_conf->osd_client_op_priority, &pulls);
- // TODOSAM: replace
- //send_pulls(g_conf->osd_client_op_priority, pulls);
+ PGBackend::RecoveryHandle *h = pgbackend->open_recovery_op();
+ recover_missing(soid, v, cct->_conf->osd_client_op_priority, h);
+ pgbackend->run_recovery_op(h, cct->_conf->osd_client_op_priority);
}
waiting_for_missing_object[soid].push_back(op);
op->mark_delayed("waiting for missing object");
@@ -5206,13 +5205,11 @@ ObjectContextRef ReplicatedPG::get_object_context(const hobject_t& soid,
obc->obs.oi = oi;
obc->obs.exists = true;
- if (can_create) {
- obc->ssc = get_snapset_context(
- soid.oid, soid.get_key(), soid.hash,
- true, soid.get_namespace(),
- soid.has_snapset() ? attrs : 0);
- register_snapset_context(obc->ssc);
- }
+ obc->ssc = get_snapset_context(
+ soid.oid, soid.get_key(), soid.hash,
+ true, soid.get_namespace(),
+ soid.has_snapset() ? attrs : 0);
+ register_snapset_context(obc->ssc);
populate_obc_watchers(obc);
dout(10) << "get_object_context " << obc << " " << soid << " 0 -> 1 read " << obc->obs.oi << dendl;
@@ -5765,11 +5762,12 @@ void ReplicatedPG::calc_head_subsets(ObjectContextRef obc, SnapSet& snapset, con
<< " clone_subsets " << clone_subsets << dendl;
}
-void ReplicatedPG::calc_clone_subsets(SnapSet& snapset, const hobject_t& soid,
- const pg_missing_t& missing,
- const hobject_t &last_backfill,
- interval_set<uint64_t>& data_subset,
- map<hobject_t, interval_set<uint64_t> >& clone_subsets)
+void ReplicatedBackend::calc_clone_subsets(
+ SnapSet& snapset, const hobject_t& soid,
+ const pg_missing_t& missing,
+ const hobject_t &last_backfill,
+ interval_set<uint64_t>& data_subset,
+ map<hobject_t, interval_set<uint64_t> >& clone_subsets)
{
dout(10) << "calc_clone_subsets " << soid
<< " clone_overlap " << snapset.clone_overlap << dendl;
@@ -5854,95 +5852,70 @@ void ReplicatedPG::calc_clone_subsets(SnapSet& snapset, const hobject_t& soid,
*/
enum { PULL_NONE, PULL_OTHER, PULL_YES };
-int ReplicatedPG::prepare_pull(
- const hobject_t& soid, eversion_t v,
+void ReplicatedBackend::prepare_pull(
+ const hobject_t& soid,
+ ObjectContextRef headctx,
int priority,
- map<int, vector<PullOp> > *pulls)
-{
+ RPGHandle *h)
+{
+ assert(get_parent()->get_local_missing().missing.count(soid));
+ eversion_t v = get_parent()->get_local_missing().missing.find(
+ soid)->second.need;
+ const map<hobject_t, set<int> > &missing_loc(
+ get_parent()->get_missing_loc());
+ const map<int, pg_missing_t > &peer_missing(
+ get_parent()->get_peer_missing());
int fromosd = -1;
- map<hobject_t,set<int> >::iterator q = missing_loc.find(soid);
- if (q != missing_loc.end()) {
- // randomize the list of possible sources
- // should we take weights into account?
- vector<int> shuffle(q->second.begin(), q->second.end());
- random_shuffle(shuffle.begin(), shuffle.end());
- for (vector<int>::iterator p = shuffle.begin();
- p != shuffle.end();
- ++p) {
- if (get_osdmap()->is_up(*p)) {
- fromosd = *p;
- break;
- }
- }
- }
- if (fromosd < 0) {
- dout(7) << "pull " << soid
- << " v " << v
- << " but it is unfound" << dendl;
- return PULL_NONE;
- }
+ map<hobject_t,set<int> >::const_iterator q = missing_loc.find(soid);
+ assert(q != missing_loc.end());
+ assert(!q->second.empty());
+
+ // pick a pullee
+ vector<int> shuffle(q->second.begin(), q->second.end());
+ random_shuffle(shuffle.begin(), shuffle.end());
+ vector<int>::iterator p = shuffle.begin();
+ assert(get_osdmap()->is_up(*p));
+ fromosd = *p;
+ assert(fromosd >= 0);
+
+ dout(7) << "pull " << soid
+ << "v " << v
+ << " on osds " << *p
+ << " from osd." << fromosd
+ << dendl;
assert(peer_missing.count(fromosd));
- if (peer_missing[fromosd].is_missing(soid, v)) {
- assert(peer_missing[fromosd].missing[soid].have != v);
+ const pg_missing_t &pmissing = peer_missing.find(fromosd)->second;
+ if (pmissing.is_missing(soid, v)) {
+ assert(pmissing.missing.find(soid)->second.have != v);
dout(10) << "pulling soid " << soid << " from osd " << fromosd
- << " at version " << peer_missing[fromosd].missing[soid].have
+ << " at version " << pmissing.missing.find(soid)->second.have
<< " rather than at version " << v << dendl;
- v = peer_missing[fromosd].missing[soid].have;
- assert(pg_log.get_log().objects.count(soid) &&
- pg_log.get_log().objects.find(soid)->second->op == pg_log_entry_t::LOST_REVERT &&
- pg_log.get_log().objects.find(soid)->second->reverting_to == v);
+ v = pmissing.missing.find(soid)->second.have;
+ assert(get_parent()->get_log().get_log().objects.count(soid) &&
+ (get_parent()->get_log().get_log().objects.find(soid)->second->op ==
+ pg_log_entry_t::LOST_REVERT) &&
+ (get_parent()->get_log().get_log().objects.find(
+ soid)->second->reverting_to ==
+ v));
}
- dout(7) << "pull " << soid
- << " v " << v
- << " on osds " << missing_loc[soid]
- << " from osd." << fromosd
- << dendl;
-
ObjectRecoveryInfo recovery_info;
- // is this a snapped object? if so, consult the snapset.. we may not need the entire object!
- if (soid.snap && soid.snap < CEPH_NOSNAP) {
- // do we have the head and/or snapdir?
- hobject_t head = soid;
- head.snap = CEPH_NOSNAP;
- if (pg_log.get_missing().is_missing(head)) {
- if (pulling.count(head)) {
- dout(10) << " missing but already pulling head " << head << dendl;
- return PULL_NONE;
- } else {
- int r = prepare_pull(
- head, pg_log.get_missing().missing.find(head)->second.need, priority,
- pulls);
- if (r != PULL_NONE)
- return PULL_OTHER;
- return PULL_NONE;
- }
- }
- head.snap = CEPH_SNAPDIR;
- if (pg_log.get_missing().is_missing(head)) {
- if (pulling.count(head)) {
- dout(10) << " missing but already pulling snapdir " << head << dendl;
- return PULL_NONE;
- } else {
- int r = prepare_pull(
- head, pg_log.get_missing().missing.find(head)->second.need, priority,
- pulls);
- if (r != PULL_NONE)
- return PULL_OTHER;
- return PULL_NONE;
- }
- }
-
+ if (soid.is_snap()) {
+ assert(!get_parent()->get_local_missing().is_missing(
+ soid.get_head()) ||
+ !get_parent()->get_local_missing().is_missing(
+ soid.get_snapdir()));
+ assert(headctx);
// check snapset
- SnapSetContext *ssc = get_snapset_context(soid.oid, soid.get_key(), soid.hash, false, soid.get_namespace());
+ SnapSetContext *ssc = headctx->ssc;
assert(ssc);
dout(10) << " snapset " << ssc->snapset << dendl;
- calc_clone_subsets(ssc->snapset, soid, pg_log.get_missing(), info.last_backfill,
+ calc_clone_subsets(ssc->snapset, soid, get_parent()->get_local_missing(),
+ get_info().last_backfill,
recovery_info.copy_subset,
recovery_info.clone_subset);
- put_snapset_context(ssc);
// FIXME: this may overestimate if we are pulling multiple clones in parallel...
dout(10) << " pulling " << recovery_info << dendl;
} else {
@@ -5952,8 +5925,8 @@ int ReplicatedPG::prepare_pull(
recovery_info.size = ((uint64_t)-1);
}
- (*pulls)[fromosd].push_back(PullOp());
- PullOp &op = (*pulls)[fromosd].back();
+ h->pulls[fromosd].push_back(PullOp());
+ PullOp &op = h->pulls[fromosd].back();
op.soid = soid;
op.recovery_info = recovery_info;
@@ -5971,7 +5944,74 @@ int ReplicatedPG::prepare_pull(
pi.recovery_progress = op.recovery_progress;
pi.priority = priority;
+ // TODOSAM: do something??
+}
+
+int ReplicatedPG::recover_missing(
+ const hobject_t &soid, eversion_t v,
+ int priority,
+ PGBackend::RecoveryHandle *h)
+{
+ map<hobject_t,set<int> >::iterator q = missing_loc.find(soid);
+ if (q == missing_loc.end()) {
+ dout(7) << "pull " << soid
+ << " v " << v
+ << " but it is unfound" << dendl;
+ return PULL_NONE;
+ }
+
+ // is this a snapped object? if so, consult the snapset.. we may not need the entire object!
+ ObjectContextRef obc;
+ ObjectContextRef head_obc;
+ if (soid.snap && soid.snap < CEPH_NOSNAP) {
+ // do we have the head and/or snapdir?
+ hobject_t head = soid.get_head();
+ if (pg_log.get_missing().is_missing(head)) {
+ if (recovering.count(head)) {
+ dout(10) << " missing but already recovering head " << head << dendl;
+ return PULL_NONE;
+ } else {
+ int r = recover_missing(
+ head, pg_log.get_missing().missing.find(head)->second.need, priority,
+ h);
+ if (r != PULL_NONE)
+ return PULL_OTHER;
+ return PULL_NONE;
+ }
+ }
+ head = soid.get_snapdir();
+ if (pg_log.get_missing().is_missing(head)) {
+ if (recovering.count(head)) {
+ dout(10) << " missing but already recovering snapdir " << head << dendl;
+ return PULL_NONE;
+ } else {
+ int r = recover_missing(
+ head, pg_log.get_missing().missing.find(head)->second.need, priority,
+ h);
+ if (r != PULL_NONE)
+ return PULL_OTHER;
+ return PULL_NONE;
+ }
+ }
+
+ // we must have one or the other
+ head_obc = get_object_context(
+ soid.get_head(),
+ false,
+ 0);
+ if (!head_obc)
+ head_obc = get_object_context(
+ soid.get_snapdir(),
+ false,
+ 0);
+ assert(head_obc);
+ }
start_recovery_op(soid);
+ pgbackend->recover_object(
+ soid,
+ head_obc,
+ obc,
+ h);
return PULL_YES;
}
@@ -6030,9 +6070,12 @@ void ReplicatedPG::prep_push_to_replica(
SnapSetContext *ssc = get_snapset_context(soid.oid, soid.get_key(), soid.hash, false, soid.get_namespace());
assert(ssc);
dout(15) << "push_to_replica snapset is " << ssc->snapset << dendl;
+// TODOSAM: fix
+#if 0
calc_clone_subsets(ssc->snapset, soid, peer_missing[peer],
peer_info[peer].last_backfill,
data_subset, clone_subsets);
+#endif
put_snapset_context(ssc);
} else if (soid.snap == CEPH_NOSNAP) {
// pushing head or unversioned object.
@@ -6219,8 +6262,11 @@ ObjectRecoveryInfo ReplicatedPG::recalc_subsets(const ObjectRecoveryInfo& recove
new_info.copy_subset.clear();
new_info.clone_subset.clear();
assert(ssc);
+// TODOSAM: fix
+#if 0
calc_clone_subsets(ssc->snapset, new_info.soid, pg_log.get_missing(), info.last_backfill,
new_info.copy_subset, new_info.clone_subset);
+#endif
put_snapset_context(ssc);
return new_info;
}
@@ -7563,7 +7609,7 @@ int ReplicatedPG::recover_primary(int max, ThreadPool::TPHandle &handle)
int started = 0;
int skipped = 0;
- map<int, vector<PullOp> > pulls;
+ PGBackend::RecoveryHandle *h = pgbackend->open_recovery_op();
map<version_t, hobject_t>::const_iterator p =
missing.rmissing.lower_bound(pg_log.get_log().last_requested);
while (p != missing.rmissing.end()) {
@@ -7670,14 +7716,14 @@ int ReplicatedPG::recover_primary(int max, ThreadPool::TPHandle &handle)
}
}
- if (!pulling.count(soid)) {
- if (pulling.count(head)) {
+ if (!recovering.count(soid)) {
+ if (recovering.count(head)) {
++skipped;
} else if (unfound) {
++skipped;
} else {
- int r = prepare_pull(
- soid, need, cct->_conf->osd_recovery_op_priority, &pulls);
+ int r = recover_missing(
+ soid, need, cct->_conf->osd_recovery_op_priority, h);
switch (r) {
case PULL_YES:
++started;
@@ -7700,8 +7746,7 @@ int ReplicatedPG::recover_primary(int max, ThreadPool::TPHandle &handle)
pg_log.set_last_requested(v);
}
- // TODOSAM: replace
- //send_pulls(g_conf->osd_recovery_op_priority, pulls);
+ pgbackend->run_recovery_op(h, cct->_conf->osd_recovery_op_priority);
return started;
}
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 22cdfb0b8a7..557fc820877 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -543,6 +543,7 @@ protected:
}
};
map<hobject_t, PullInfo> pulling;
+ set<hobject_t> recovering;
ObjectRecoveryInfo recalc_subsets(const ObjectRecoveryInfo& recovery_info);
@@ -631,10 +632,6 @@ protected:
const hobject_t &last_backfill,
interval_set<uint64_t>& data_subset,
map<hobject_t, interval_set<uint64_t> >& clone_subsets);
- void calc_clone_subsets(SnapSet& snapset, const hobject_t& poid, const pg_missing_t& missing,
- const hobject_t &last_backfill,
- interval_set<uint64_t>& data_subset,
- map<hobject_t, interval_set<uint64_t> >& clone_subsets);
void prep_push_to_replica(
ObjectContextRef obc,
const hobject_t& oid,
@@ -658,11 +655,11 @@ protected:
// Cancels/resets pulls from peer
void check_recovery_sources(const OSDMapRef map);
- int prepare_pull(
- const hobject_t& oid, eversion_t v,
+ int recover_missing(
+ const hobject_t& oid,
+ eversion_t v,
int priority,
- map<int, vector<PullOp> > *pulls
- );
+ PGBackend::RecoveryHandle *h);
// low level ops