summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/messages/MOSDPGInfo.h2
-rw-r--r--src/messages/MOSDPGLog.h2
-rw-r--r--src/messages/MOSDPGMissing.h4
-rw-r--r--src/messages/MOSDPGNotify.h2
-rw-r--r--src/osd/OSD.cc4
-rw-r--r--src/osd/PG.cc145
-rw-r--r--src/osd/PG.h93
-rw-r--r--src/osd/ReplicatedPG.cc22
-rw-r--r--src/osd/ReplicatedPG.h4
-rw-r--r--src/osd/osd_types.cc172
-rw-r--r--src/osd/osd_types.h80
-rw-r--r--src/test/encoding/types.h6
12 files changed, 294 insertions, 242 deletions
diff --git a/src/messages/MOSDPGInfo.h b/src/messages/MOSDPGInfo.h
index 7e8fb57274b..2dc6ebb9961 100644
--- a/src/messages/MOSDPGInfo.h
+++ b/src/messages/MOSDPGInfo.h
@@ -17,7 +17,7 @@
#define CEPH_MOSDPGINFO_H
#include "msg/Message.h"
-#include "osd/PG.h"
+#include "osd/osd_types.h"
class MOSDPGInfo : public Message {
epoch_t epoch;
diff --git a/src/messages/MOSDPGLog.h b/src/messages/MOSDPGLog.h
index e71a0000d45..41aca565c53 100644
--- a/src/messages/MOSDPGLog.h
+++ b/src/messages/MOSDPGLog.h
@@ -29,7 +29,7 @@ class MOSDPGLog : public Message {
public:
pg_info_t info;
pg_log_t log;
- PG::Missing missing;
+ pg_missing_t missing;
epoch_t get_epoch() { return epoch; }
pg_t get_pgid() { return info.pgid; }
diff --git a/src/messages/MOSDPGMissing.h b/src/messages/MOSDPGMissing.h
index 55debebf30f..20f61a4c720 100644
--- a/src/messages/MOSDPGMissing.h
+++ b/src/messages/MOSDPGMissing.h
@@ -23,13 +23,13 @@ class MOSDPGMissing : public Message {
public:
pg_info_t info;
- PG::Missing missing;
+ pg_missing_t missing;
epoch_t get_epoch() { return epoch; }
MOSDPGMissing() {}
MOSDPGMissing(version_t mv, const pg_info_t &info_,
- const PG::Missing &missing_)
+ const pg_missing_t &missing_)
: Message(MSG_OSD_PG_MISSING), epoch(mv), info(info_),
missing(missing_) { }
private:
diff --git a/src/messages/MOSDPGNotify.h b/src/messages/MOSDPGNotify.h
index 9c022d2443b..9c5306ff9c8 100644
--- a/src/messages/MOSDPGNotify.h
+++ b/src/messages/MOSDPGNotify.h
@@ -17,7 +17,7 @@
#include "msg/Message.h"
-#include "osd/PG.h"
+#include "osd/osd_types.h"
/*
* PGNotify - notify primary of my PGs and versions.
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 137377b4036..5727928b186 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -2453,8 +2453,8 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
pg->lock();
fout << *pg << std::endl;
- std::map<hobject_t, PG::Missing::item>::iterator mend = pg->missing.missing.end();
- std::map<hobject_t, PG::Missing::item>::iterator mi = pg->missing.missing.begin();
+ std::map<hobject_t, pg_missing_t::item>::iterator mend = pg->missing.missing.end();
+ std::map<hobject_t, pg_missing_t::item>::iterator mi = pg->missing.missing.begin();
for (; mi != mend; ++mi) {
fout << mi->first << " -> " << mi->second << std::endl;
map<hobject_t, set<int> >::const_iterator mli =
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index abb10e35b04..6057a458f2d 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -96,7 +96,7 @@ void PG::IndexedLog::trim(ObjectStore::Transaction& t, eversion_t s)
/********* PG **********/
-void PG::proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, Missing& omissing, int from)
+void PG::proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, pg_missing_t& omissing, int from)
{
dout(10) << "proc_master_log for osd." << from << ": " << olog << " " << omissing << dendl;
assert(!is_active() && is_primary());
@@ -115,7 +115,7 @@ void PG::proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t
}
void PG::proc_replica_log(ObjectStore::Transaction& t,
- pg_info_t &oinfo, pg_log_t &olog, Missing& omissing, int from)
+ pg_info_t &oinfo, pg_log_t &olog, pg_missing_t& omissing, int from)
{
dout(10) << "proc_replica_log for osd." << from << ": "
<< oinfo << " " << olog << " " << omissing << dendl;
@@ -130,7 +130,7 @@ void PG::proc_replica_log(ObjectStore::Transaction& t,
we will send the peer enough log to arrive at the same state.
*/
- for (map<hobject_t, Missing::item>::iterator i = omissing.missing.begin();
+ for (map<hobject_t, pg_missing_t::item>::iterator i = omissing.missing.begin();
i != omissing.missing.end();
++i) {
dout(20) << " before missing " << i->first << " need " << i->second.need
@@ -211,7 +211,7 @@ void PG::proc_replica_log(ObjectStore::Transaction& t,
might_have_unfound.insert(from);
search_for_missing(oinfo, &omissing, from);
- for (map<hobject_t, Missing::item>::iterator i = omissing.missing.begin();
+ for (map<hobject_t, pg_missing_t::item>::iterator i = omissing.missing.begin();
i != omissing.missing.end();
++i) {
dout(20) << " after missing " << i->first << " need " << i->second.need
@@ -322,10 +322,10 @@ void PG::merge_log(ObjectStore::Transaction& t,
// The logs must overlap.
assert(log.head >= olog.tail && olog.head >= log.tail);
- for (map<hobject_t, Missing::item>::iterator i = missing.missing.begin();
+ for (map<hobject_t, pg_missing_t::item>::iterator i = missing.missing.begin();
i != missing.missing.end();
++i) {
- dout(20) << "Missing sobject: " << i->first << dendl;
+ dout(20) << "pg_missing_t sobject: " << i->first << dendl;
}
bool changed = false;
@@ -446,7 +446,7 @@ void PG::merge_log(ObjectStore::Transaction& t,
* TODO: if the missing set becomes very large, this could get expensive.
* Instead, we probably want to just iterate over our unfound set.
*/
-bool PG::search_for_missing(const pg_info_t &oinfo, const Missing *omissing,
+bool PG::search_for_missing(const pg_info_t &oinfo, const pg_missing_t *omissing,
int fromosd)
{
bool stats_updated = false;
@@ -457,7 +457,7 @@ bool PG::search_for_missing(const pg_info_t &oinfo, const Missing *omissing,
peer_missing[fromosd];
// found items?
- for (map<hobject_t,Missing::item>::iterator p = missing.missing.begin();
+ for (map<hobject_t,pg_missing_t::item>::iterator p = missing.missing.begin();
p != missing.missing.end();
++p) {
const hobject_t &soid(p->first);
@@ -539,12 +539,12 @@ void PG::discover_all_missing(map< int, map<pg_t,pg_query_t> > &query_map)
continue;
}
- // If we've requested any of this stuff, the Missing information
+ // If we've requested any of this stuff, the pg_missing_t information
// should be on its way.
// TODO: coalsce requested_* into a single data structure
if (peer_missing.find(peer) != peer_missing.end()) {
dout(20) << __func__ << ": osd." << peer
- << ": we already have Missing" << dendl;
+ << ": we already have pg_missing_t" << dendl;
continue;
}
if (peer_log_requested.find(peer) != peer_log_requested.end()) {
@@ -559,7 +559,7 @@ void PG::discover_all_missing(map< int, map<pg_t,pg_query_t> > &query_map)
}
// Request missing
- dout(10) << __func__ << ": osd." << peer << ": requesting Missing"
+ dout(10) << __func__ << ": osd." << peer << ": requesting pg_missing_t"
<< dendl;
peer_missing_requested.insert(peer);
query_map[peer][info.pgid] =
@@ -608,7 +608,7 @@ bool PG::is_all_uptodate() const
++a;
for (; a != end; ++a) {
int peer = *a;
- map<int, Missing>::const_iterator pm = peer_missing.find(peer);
+ map<int, pg_missing_t>::const_iterator pm = peer_missing.find(peer);
if (pm == peer_missing.end()) {
dout(10) << __func__ << ": osd." << peer << " don't have missing set" << dendl;
uptodate = false;
@@ -1052,7 +1052,7 @@ bool PG::choose_acting(int& newest_update_osd)
* This is used by the primary OSD during recovery.
*
* This set tracks the OSDs which might have unfound objects that the primary
- * OSD needs. As we receive Missing from each OSD in might_have_unfound, we
+ * OSD needs. As we receive pg_missing_t from each OSD in might_have_unfound, we
* will remove the OSD from the set.
*/
void PG::build_might_have_unfound()
@@ -1216,7 +1216,7 @@ void PG::activate(ObjectStore::Transaction& t, list<Context*>& tfin,
dout(10) << "activate peer osd." << peer << " " << pi << dendl;
MOSDPGLog *m = 0;
- Missing& pm = peer_missing[peer];
+ pg_missing_t& pm = peer_missing[peer];
if (pi.last_update == info.last_update) {
// empty log
@@ -3037,7 +3037,7 @@ void PG::share_pg_log()
vector<int>::const_iterator end = acting.end();
while (++a != end) {
int peer(*a);
- PG::Missing& pmissing(peer_missing[peer]);
+ pg_missing_t& pmissing(peer_missing[peer]);
pg_info_t& pinfo(peer_info[peer]);
MOSDPGLog *m = new MOSDPGLog(info.last_update.epoch, info);
@@ -3392,121 +3392,6 @@ void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo)
}
}
-unsigned int PG::Missing::num_missing() const
-{
- return missing.size();
-}
-
-bool PG::Missing::have_missing() const
-{
- return !missing.empty();
-}
-
-void PG::Missing::swap(Missing& o)
-{
- missing.swap(o.missing);
- rmissing.swap(o.rmissing);
-}
-
-bool PG::Missing::is_missing(const hobject_t& oid) const
-{
- return (missing.find(oid) != missing.end());
-}
-
-bool PG::Missing::is_missing(const hobject_t& oid, eversion_t v) const
-{
- map<hobject_t, item>::const_iterator m = missing.find(oid);
- if (m == missing.end())
- return false;
- const Missing::item &item(m->second);
- if (item.need > v)
- return false;
- return true;
-}
-
-eversion_t PG::Missing::have_old(const hobject_t& oid) const
-{
- map<hobject_t, item>::const_iterator m = missing.find(oid);
- if (m == missing.end())
- return eversion_t();
- const Missing::item &item(m->second);
- return item.have;
-}
-
-/*
- * this needs to be called in log order as we extend the log. it
- * assumes missing is accurate up through the previous log entry.
- */
-void PG::Missing::add_next_event(const pg_log_entry_t& e)
-{
- if (e.is_update()) {
- if (e.prior_version == eversion_t() || e.is_clone()) {
- // new object.
- //assert(missing.count(e.soid) == 0); // might already be missing divergent item.
- if (missing.count(e.soid)) // already missing divergent item
- rmissing.erase(missing[e.soid].need.version);
- missing[e.soid] = item(e.version, eversion_t()); // .have = nil
- } else if (missing.count(e.soid)) {
- // already missing (prior).
- //assert(missing[e.soid].need == e.prior_version);
- rmissing.erase(missing[e.soid].need.version);
- missing[e.soid].need = e.version; // leave .have unchanged.
- } else if (e.is_backlog()) {
- // May not have prior version
- assert(0 == "these don't exist anymore");
- } else {
- // not missing, we must have prior_version (if any)
- missing[e.soid] = item(e.version, e.prior_version);
- }
- rmissing[e.version.version] = e.soid;
- } else
- rm(e.soid, e.version);
-}
-
-void PG::Missing::revise_need(hobject_t oid, eversion_t need)
-{
- if (missing.count(oid)) {
- rmissing.erase(missing[oid].need.version);
- missing[oid].need = need; // no not adjust .have
- } else {
- missing[oid] = item(need, eversion_t());
- }
- rmissing[need.version] = oid;
-}
-
-void PG::Missing::add(const hobject_t& oid, eversion_t need, eversion_t have)
-{
- missing[oid] = item(need, have);
- rmissing[need.version] = oid;
-}
-
-void PG::Missing::rm(const hobject_t& oid, eversion_t v)
-{
- std::map<hobject_t, Missing::item>::iterator p = missing.find(oid);
- if (p != missing.end() && p->second.need <= v)
- rm(p);
-}
-
-void PG::Missing::rm(const std::map<hobject_t, Missing::item>::iterator &m)
-{
- rmissing.erase(m->second.need.version);
- missing.erase(m);
-}
-
-void PG::Missing::got(const hobject_t& oid, eversion_t v)
-{
- std::map<hobject_t, Missing::item>::iterator p = missing.find(oid);
- assert(p != missing.end());
- assert(p->second.need <= v);
- got(p);
-}
-
-void PG::Missing::got(const std::map<hobject_t, Missing::item>::iterator &m)
-{
- rmissing.erase(m->second.need.version);
- missing.erase(m);
-}
-
ostream& operator<<(ostream& out, const PG& pg)
{
out << "pg[" << pg.info
diff --git a/src/osd/PG.h b/src/osd/PG.h
index eebfa884b08..349100e5e4e 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -319,69 +319,6 @@ public:
WRITE_CLASS_ENCODER(OndiskLog)
- /*
- * Missing - summary of missing objects.
- * kept in memory, as a supplement to Log.
- * also used to pass missing info in messages.
- */
- struct Missing {
- struct item {
- eversion_t need, have;
- item() {}
- item(eversion_t n) : need(n) {} // have no old version
- item(eversion_t n, eversion_t h) : need(n), have(h) {}
- void encode(bufferlist& bl) const {
- ::encode(need, bl);
- ::encode(have, bl);
- }
- void decode(bufferlist::iterator& bl) {
- ::decode(need, bl);
- ::decode(have, bl);
- }
- };
- WRITE_CLASS_ENCODER(item)
-
- map<hobject_t, item> missing; // oid -> (need v, have v)
- map<version_t, hobject_t> rmissing; // v -> oid
-
- unsigned int num_missing() const;
- bool have_missing() const;
- void swap(Missing& o);
- bool is_missing(const hobject_t& oid) const;
- bool is_missing(const hobject_t& oid, eversion_t v) const;
- eversion_t have_old(const hobject_t& oid) const;
- void add_next_event(const pg_log_entry_t& e);
- void revise_need(hobject_t oid, eversion_t need);
- void add(const hobject_t& oid, eversion_t need, eversion_t have);
- void rm(const hobject_t& oid, eversion_t v);
- void rm(const std::map<hobject_t, Missing::item>::iterator &m);
- void got(const hobject_t& oid, eversion_t v);
- void got(const std::map<hobject_t, Missing::item>::iterator &m);
-
- void clear() {
- missing.clear();
- rmissing.clear();
- }
-
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(missing, bl);
- }
-
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(missing, bl);
-
- for (map<hobject_t,item>::iterator it = missing.begin();
- it != missing.end();
- ++it)
- rmissing[it->second.need.version] = it->first;
- }
- };
- WRITE_CLASS_ENCODER(Missing)
-
/*** PG ****/
protected:
@@ -508,7 +445,7 @@ public:
hobject_t log_oid;
hobject_t biginfo_oid;
OndiskLog ondisklog;
- Missing missing;
+ pg_missing_t missing;
map<hobject_t, set<int> > missing_loc;
interval_set<snapid_t> snap_collections;
@@ -1026,8 +963,8 @@ protected:
bool need_up_thru;
set<int> stray_set; // non-acting osds that have PG data.
eversion_t oldest_update; // acting: lowest (valid) last_update in active set
- map<int,pg_info_t> peer_info; // info from peers (stray or prior)
- map<int, Missing> peer_missing;
+ map<int,pg_info_t> peer_info; // info from peers (stray or prior)
+ map<int,pg_missing_t> peer_missing;
set<int> peer_log_requested; // logs i've requested (and start stamps)
set<int> peer_missing_requested;
set<int> stray_purged; // i deleted these strays; ignore racing PGInfo from them
@@ -1177,13 +1114,13 @@ public:
virtual void calc_trim_to() = 0;
void proc_replica_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog,
- Missing& omissing, int from);
+ pg_missing_t& omissing, int from);
void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog,
- Missing& omissing, int from);
+ pg_missing_t& omissing, int from);
bool proc_replica_info(int from, pg_info_t &info);
bool merge_old_entry(ObjectStore::Transaction& t, pg_log_entry_t& oe);
void merge_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, int from);
- bool search_for_missing(const pg_info_t &oinfo, const Missing *omissing,
+ bool search_for_missing(const pg_info_t &oinfo, const pg_missing_t *omissing,
int fromosd);
void check_for_lost_objects();
@@ -1452,27 +1389,9 @@ public:
utime_t expire) = 0;
};
-WRITE_CLASS_ENCODER(PG::Missing::item)
-WRITE_CLASS_ENCODER(PG::Missing)
WRITE_CLASS_ENCODER(PG::Interval)
WRITE_CLASS_ENCODER(PG::OndiskLog)
-inline ostream& operator<<(ostream& out, const PG::Missing::item& i)
-{
- out << i.need;
- if (i.have != eversion_t())
- out << "(" << i.have << ")";
- return out;
-}
-
-inline ostream& operator<<(ostream& out, const PG::Missing& missing)
-{
- out << "missing(" << missing.num_missing();
- //if (missing.num_lost()) out << ", " << missing.num_lost() << " lost";
- out << ")";
- return out;
-}
-
inline ostream& operator<<(ostream& out, const PG::Interval& i)
{
out << "interval(" << i.first << "-" << i.last << " " << i.up << "/" << i.acting;
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index f4396f1056c..d4da6021870 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -103,7 +103,7 @@ void ReplicatedPG::wait_for_missing_object(const hobject_t& soid, Message *m)
assert(is_missing_object(soid));
// we don't have it (yet).
- map<hobject_t, Missing::item>::const_iterator g = missing.missing.find(soid);
+ map<hobject_t, pg_missing_t::item>::const_iterator g = missing.missing.find(soid);
assert(g != missing.missing.end());
const eversion_t &v(g->second.need);
@@ -322,7 +322,7 @@ void ReplicatedPG::do_pg_op(MOSDOp *op)
}
assert(snapid == CEPH_NOSNAP || missing.missing.empty());
- map<hobject_t, Missing::item>::iterator missing_iter =
+ map<hobject_t, pg_missing_t::item>::iterator missing_iter =
missing.missing.lower_bound(current);
vector<hobject_t>::iterator ls_iter = sentries.begin();
while (1) {
@@ -3762,7 +3762,7 @@ void ReplicatedPG::sub_op_modify_reply(MOSDSubOpReply *r)
// ===========================================================
void ReplicatedPG::calc_head_subsets(SnapSet& snapset, const hobject_t& head,
- Missing& missing,
+ pg_missing_t& missing,
const hobject_t &last_backfill,
interval_set<uint64_t>& data_subset,
map<hobject_t, interval_set<uint64_t> >& clone_subsets)
@@ -3804,7 +3804,7 @@ void ReplicatedPG::calc_head_subsets(SnapSet& snapset, const hobject_t& head,
}
void ReplicatedPG::calc_clone_subsets(SnapSet& snapset, const hobject_t& soid,
- Missing& missing,
+ pg_missing_t& missing,
const hobject_t &last_backfill,
interval_set<uint64_t>& data_subset,
map<hobject_t, interval_set<uint64_t> >& clone_subsets)
@@ -4874,8 +4874,8 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
utime_t mtime = ceph_clock_now(g_ceph_context);
info.last_update.epoch = get_osdmap()->get_epoch();
- map<hobject_t, Missing::item>::iterator m = missing.missing.begin();
- map<hobject_t, Missing::item>::iterator mend = missing.missing.end();
+ map<hobject_t, pg_missing_t::item>::iterator m = missing.missing.begin();
+ map<hobject_t, pg_missing_t::item>::iterator mend = missing.missing.end();
while (m != mend) {
const hobject_t &oid(m->first);
if (missing_loc.find(oid) != missing_loc.end()) {
@@ -5196,7 +5196,7 @@ int ReplicatedPG::recover_primary(int max)
latest = 0;
soid = p->second;
}
- Missing::item& item = missing.missing[p->second];
+ pg_missing_t::item& item = missing.missing[p->second];
p++;
hobject_t head = soid;
@@ -5310,7 +5310,7 @@ int ReplicatedPG::recover_primary(int max)
dout(10) << " need to pull prior_version " << need << " for revert " << item << dendl;
set<int>& loc = missing_loc[soid];
- for (map<int,Missing>::iterator p = peer_missing.begin(); p != peer_missing.end(); ++p)
+ for (map<int,pg_missing_t>::iterator p = peer_missing.begin(); p != peer_missing.end(); ++p)
if (p->second.missing[soid].have == need)
loc.insert(p->first);
dout(10) << " will pull " << need << " from one of " << loc << dendl;
@@ -5408,7 +5408,7 @@ int ReplicatedPG::recover_replicas(int max)
// this is FAR from an optimal recovery order. pretty lame, really.
for (unsigned i=1; i<acting.size(); i++) {
int peer = acting[i];
- map<int, Missing>::const_iterator pm = peer_missing.find(peer);
+ map<int, pg_missing_t>::const_iterator pm = peer_missing.find(peer);
assert(pm != peer_missing.end());
size_t m_sz = pm->second.num_missing();
@@ -5416,7 +5416,7 @@ int ReplicatedPG::recover_replicas(int max)
dout(20) << " peer osd." << peer << " missing " << pm->second.missing << dendl;
// oldest first!
- const Missing &m(pm->second);
+ const pg_missing_t &m(pm->second);
for (map<version_t, hobject_t>::const_iterator p = m.rmissing.begin();
p != m.rmissing.end() && started < max;
++p) {
@@ -5436,7 +5436,7 @@ int ReplicatedPG::recover_replicas(int max)
}
dout(10) << __func__ << ": recover_object_replicas(" << soid << ")" << dendl;
- map<hobject_t,Missing::item>::const_iterator p = m.missing.find(soid);
+ map<hobject_t,pg_missing_t::item>::const_iterator p = m.missing.find(soid);
started += recover_object_replicas(soid, p->second.need);
}
}
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index ef09fa27d1e..93634e4eafa 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -588,11 +588,11 @@ protected:
int recover_object_replicas(const hobject_t& soid, eversion_t v);
void calc_head_subsets(SnapSet& snapset, const hobject_t& head,
- Missing& missing,
+ pg_missing_t& missing,
const hobject_t &last_backfill,
interval_set<uint64_t>& data_subset,
map<hobject_t, interval_set<uint64_t> >& clone_subsets);
- void calc_clone_subsets(SnapSet& snapset, const hobject_t& poid, Missing& missing,
+ void calc_clone_subsets(SnapSet& snapset, const hobject_t& poid, pg_missing_t& missing,
const hobject_t &last_backfill,
interval_set<uint64_t>& data_subset,
map<hobject_t, interval_set<uint64_t> >& clone_subsets);
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 3933292cd9c..1b7c4fa9153 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -1286,6 +1286,178 @@ ostream& pg_log_t::print(ostream& out) const
}
+// -- pg_missing_t --
+
+void pg_missing_t::encode(bufferlist &bl) const
+{
+ __u8 struct_v = 1;
+ ::encode(struct_v, bl);
+ ::encode(missing, bl);
+}
+
+void pg_missing_t::decode(bufferlist::iterator &bl)
+{
+ __u8 struct_v;
+ ::decode(struct_v, bl);
+ ::decode(missing, bl);
+
+ for (map<hobject_t,item>::iterator it = missing.begin();
+ it != missing.end();
+ ++it)
+ rmissing[it->second.need.version] = it->first;
+}
+
+void pg_missing_t::dump(Formatter *f) const
+{
+ f->open_array_section("missing");
+ for (map<hobject_t,item>::const_iterator p = missing.begin(); p != missing.end(); ++p) {
+ f->open_object_section("item");
+ f->dump_stream("object") << p->first;
+ p->second.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void pg_missing_t::generate_test_instances(list<pg_missing_t*>& o)
+{
+ o.push_back(new pg_missing_t);
+ o.push_back(new pg_missing_t);
+ o.back()->add(hobject_t(object_t("foo"), "foo", 123, 456), eversion_t(5, 6), eversion_t(5, 1));
+}
+
+ostream& operator<<(ostream& out, const pg_missing_t::item& i)
+{
+ out << i.need;
+ if (i.have != eversion_t())
+ out << "(" << i.have << ")";
+ return out;
+}
+
+ostream& operator<<(ostream& out, const pg_missing_t& missing)
+{
+ out << "missing(" << missing.num_missing();
+ //if (missing.num_lost()) out << ", " << missing.num_lost() << " lost";
+ out << ")";
+ return out;
+}
+
+
+unsigned int pg_missing_t::num_missing() const
+{
+ return missing.size();
+}
+
+bool pg_missing_t::have_missing() const
+{
+ return !missing.empty();
+}
+
+void pg_missing_t::swap(pg_missing_t& o)
+{
+ missing.swap(o.missing);
+ rmissing.swap(o.rmissing);
+}
+
+bool pg_missing_t::is_missing(const hobject_t& oid) const
+{
+ return (missing.find(oid) != missing.end());
+}
+
+bool pg_missing_t::is_missing(const hobject_t& oid, eversion_t v) const
+{
+ map<hobject_t, item>::const_iterator m = missing.find(oid);
+ if (m == missing.end())
+ return false;
+ const pg_missing_t::item &item(m->second);
+ if (item.need > v)
+ return false;
+ return true;
+}
+
+eversion_t pg_missing_t::have_old(const hobject_t& oid) const
+{
+ map<hobject_t, item>::const_iterator m = missing.find(oid);
+ if (m == missing.end())
+ return eversion_t();
+ const pg_missing_t::item &item(m->second);
+ return item.have;
+}
+
+/*
+ * this needs to be called in log order as we extend the log. it
+ * assumes missing is accurate up through the previous log entry.
+ */
+void pg_missing_t::add_next_event(const pg_log_entry_t& e)
+{
+ if (e.is_update()) {
+ if (e.prior_version == eversion_t() || e.is_clone()) {
+ // new object.
+ //assert(missing.count(e.soid) == 0); // might already be missing divergent item.
+ if (missing.count(e.soid)) // already missing divergent item
+ rmissing.erase(missing[e.soid].need.version);
+ missing[e.soid] = item(e.version, eversion_t()); // .have = nil
+ } else if (missing.count(e.soid)) {
+ // already missing (prior).
+ //assert(missing[e.soid].need == e.prior_version);
+ rmissing.erase(missing[e.soid].need.version);
+ missing[e.soid].need = e.version; // leave .have unchanged.
+ } else if (e.is_backlog()) {
+ // May not have prior version
+ assert(0 == "these don't exist anymore");
+ } else {
+ // not missing, we must have prior_version (if any)
+ missing[e.soid] = item(e.version, e.prior_version);
+ }
+ rmissing[e.version.version] = e.soid;
+ } else
+ rm(e.soid, e.version);
+}
+
+void pg_missing_t::revise_need(hobject_t oid, eversion_t need)
+{
+ if (missing.count(oid)) {
+ rmissing.erase(missing[oid].need.version);
+ missing[oid].need = need; // no not adjust .have
+ } else {
+ missing[oid] = item(need, eversion_t());
+ }
+ rmissing[need.version] = oid;
+}
+
+void pg_missing_t::add(const hobject_t& oid, eversion_t need, eversion_t have)
+{
+ missing[oid] = item(need, have);
+ rmissing[need.version] = oid;
+}
+
+void pg_missing_t::rm(const hobject_t& oid, eversion_t v)
+{
+ std::map<hobject_t, pg_missing_t::item>::iterator p = missing.find(oid);
+ if (p != missing.end() && p->second.need <= v)
+ rm(p);
+}
+
+void pg_missing_t::rm(const std::map<hobject_t, pg_missing_t::item>::iterator &m)
+{
+ rmissing.erase(m->second.need.version);
+ missing.erase(m);
+}
+
+void pg_missing_t::got(const hobject_t& oid, eversion_t v)
+{
+ std::map<hobject_t, pg_missing_t::item>::iterator p = missing.find(oid);
+ assert(p != missing.end());
+ assert(p->second.need <= v);
+ got(p);
+}
+
+void pg_missing_t::got(const std::map<hobject_t, pg_missing_t::item>::iterator &m)
+{
+ rmissing.erase(m->second.need.version);
+ missing.erase(m);
+}
+
// -- OSDSuperblock --
void OSDSuperblock::encode(bufferlist &bl) const
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index d116f8585ac..f178f680f78 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -44,6 +44,7 @@ struct osd_reqid_t {
entity_name_t name; // who
tid_t tid;
int32_t inc; // incarnation
+
osd_reqid_t()
: tid(0), inc(0) {}
osd_reqid_t(const entity_name_t& a, int i, tid_t t)
@@ -843,7 +844,12 @@ struct pool_stat_t {
WRITE_CLASS_ENCODER(pool_stat_t)
-
+/**
+ * pg_history_t - information about recent pg peering/mapping history
+ *
+ * This is aggressively shared between OSDs to bound the amount of past
+ * history they need to worry about.
+ */
struct pg_history_t {
epoch_t epoch_created; // epoch in which PG was created
epoch_t last_epoch_started; // lower bound on last epoch started (anywhere, not necessarily locally)
@@ -1019,6 +1025,10 @@ inline ostream& operator<<(ostream& out, const pg_query_t& q) {
}
+/**
+ * pg_log_entry_t - single entry/event in pg log
+ *
+ */
struct pg_log_entry_t {
enum {
MODIFY = 1,
@@ -1198,7 +1208,75 @@ inline ostream& operator<<(ostream& out, const pg_log_t& log)
}
+/**
+ * pg_missing_t - summary of missing objects.
+ *
+ * kept in memory, as a supplement to pg_log_t
+ * also used to pass missing info in messages.
+ */
+struct pg_missing_t {
+ struct item {
+ eversion_t need, have;
+ item() {}
+ item(eversion_t n) : need(n) {} // have no old version
+ item(eversion_t n, eversion_t h) : need(n), have(h) {}
+ void encode(bufferlist& bl) const {
+ ::encode(need, bl);
+ ::encode(have, bl);
+ }
+ void decode(bufferlist::iterator& bl) {
+ ::decode(need, bl);
+ ::decode(have, bl);
+ }
+ void dump(Formatter *f) const {
+ f->dump_stream("need") << need;
+ f->dump_stream("have") << have;
+ }
+ static void generate_test_instances(list<item*>& o) {
+ o.push_back(new item);
+ o.push_back(new item);
+ o.back()->need = eversion_t(1, 2);
+ o.back()->have = eversion_t(1, 1);
+ }
+ };
+ WRITE_CLASS_ENCODER(item)
+
+ map<hobject_t, item> missing; // oid -> (need v, have v)
+ map<version_t, hobject_t> rmissing; // v -> oid
+
+ unsigned int num_missing() const;
+ bool have_missing() const;
+ void swap(pg_missing_t& o);
+ bool is_missing(const hobject_t& oid) const;
+ bool is_missing(const hobject_t& oid, eversion_t v) const;
+ eversion_t have_old(const hobject_t& oid) const;
+ void add_next_event(const pg_log_entry_t& e);
+ void revise_need(hobject_t oid, eversion_t need);
+ void add(const hobject_t& oid, eversion_t need, eversion_t have);
+ void rm(const hobject_t& oid, eversion_t v);
+ void rm(const std::map<hobject_t, pg_missing_t::item>::iterator &m);
+ void got(const hobject_t& oid, eversion_t v);
+ void got(const std::map<hobject_t, pg_missing_t::item>::iterator &m);
+
+ void clear() {
+ missing.clear();
+ rmissing.clear();
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<pg_missing_t*>& o);
+};
+WRITE_CLASS_ENCODER(pg_missing_t::item)
+WRITE_CLASS_ENCODER(pg_missing_t)
+
+ostream& operator<<(ostream& out, const pg_missing_t::item& i);
+ostream& operator<<(ostream& out, const pg_missing_t& missing);
+
+
+// -----------------------------------------
struct osd_peer_stat_t {
struct ceph_timespec stamp;
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index 6744a13a511..0445429bba9 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -8,10 +8,6 @@ TYPE(osd_info_t)
TYPE(OSDMap)
#include "osd/PG.h"
-//TYPE(PG::Missing::item)
-//TYPE(PG::Missing)
-//TYPE(PG::Log::Entry)
-//TYPE(PG::Log)
TYPE(PG::Interval)
TYPE(PG::OndiskLog)
@@ -32,6 +28,8 @@ TYPE(pg_info_t)
TYPE(pg_query_t)
TYPE(pg_log_entry_t)
TYPE(pg_log_t)
+TYPE(pg_missing_t::item)
+TYPE(pg_missing_t)
TYPE(watch_info_t)
TYPE(object_info_t)