summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-10-04 12:29:26 -0700
committerSamuel Just <sam.just@inktank.com>2013-10-04 13:49:11 -0700
commit091809b8149c7595cbcca439c5b8b75a0c42efe1 (patch)
tree0b607c8e9ca8afe99bc3f3e58bcf797913922bfb
parent399f1d53f7f441992f48aa72139cd628c4ad4f29 (diff)
downloadceph-091809b8149c7595cbcca439c5b8b75a0c42efe1.tar.gz
PGMap,PGMonitor: maintain mapping of osd to recent stat epoch
Also, osd_stat will be empty for out osd. When an osd is marked out, rather than remove it from osd_stat, we instead 0 out the structure. This patch also makes osd_stat_updates and osd_stat_rm private. This should make it simpler to enforce invariants on these mappings. Each up osd will have a mapping since out osds are now included as empty stats. Signed-off-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/mon/PGMap.cc45
-rw-r--r--src/mon/PGMap.h37
-rw-r--r--src/mon/PGMonitor.cc24
3 files changed, 87 insertions, 19 deletions
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index e9a35c6b8ab..0b40e9264ce 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -30,7 +30,7 @@ void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const
return;
}
- ENCODE_START(6, 5, bl);
+ ENCODE_START(7, 5, bl);
::encode(version, bl);
::encode(pg_stat_updates, bl);
::encode(osd_stat_updates, bl);
@@ -41,6 +41,7 @@ void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const
::encode(nearfull_ratio, bl);
::encode(pg_remove, bl);
::encode(stamp, bl);
+ ::encode(osd_epochs, bl);
ENCODE_FINISH(bl);
}
@@ -89,6 +90,17 @@ void PGMap::Incremental::decode(bufferlist::iterator &bl)
}
if (struct_v >= 6)
::decode(stamp, bl);
+ if (struct_v >= 7) {
+ ::decode(osd_epochs, bl);
+ } else {
+ for (map<int32_t, osd_stat_t>::iterator i = osd_stat_updates.begin();
+ i != osd_stat_updates.end();
+ ++i) {
+ // This isn't accurate, but will cause trimming to behave like
+ // previously.
+ osd_epochs.insert(make_pair(i->first, osdmap_epoch));
+ }
+ }
DECODE_FINISH(bl);
}
@@ -195,8 +207,10 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
}
stat_pg_add(update_pg, update_stat);
}
- for (map<int32_t,osd_stat_t>::const_iterator p = inc.osd_stat_updates.begin();
- p != inc.osd_stat_updates.end();
+ assert(osd_stat.size() == osd_epochs.size());
+ for (map<int32_t,osd_stat_t>::const_iterator p =
+ inc.get_osd_stat_updates().begin();
+ p != inc.get_osd_stat_updates().end();
++p) {
int osd = p->first;
const osd_stat_t &new_stats(p->second);
@@ -209,6 +223,8 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
stat_osd_sub(t->second);
t->second = new_stats;
}
+ assert(inc.get_osd_epochs().find(osd) != inc.get_osd_epochs().end());
+ osd_epochs.insert(*(inc.get_osd_epochs().find(osd)));
stat_osd_add(new_stats);
@@ -226,8 +242,8 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
}
}
- for (set<int>::iterator p = inc.osd_stat_rm.begin();
- p != inc.osd_stat_rm.end();
+ for (set<int>::iterator p = inc.get_osd_stat_rm().begin();
+ p != inc.get_osd_stat_rm().end();
++p) {
hash_map<int32_t,osd_stat_t>::iterator t = osd_stat.find(*p);
if (t != osd_stat.end()) {
@@ -434,7 +450,7 @@ void PGMap::encode(bufferlist &bl, uint64_t features) const
return;
}
- ENCODE_START(5, 4, bl);
+ ENCODE_START(6, 4, bl);
::encode(version, bl);
::encode(pg_stat, bl);
::encode(osd_stat, bl);
@@ -443,6 +459,7 @@ void PGMap::encode(bufferlist &bl, uint64_t features) const
::encode(full_ratio, bl);
::encode(nearfull_ratio, bl);
::encode(stamp, bl);
+ ::encode(osd_epochs, bl);
ENCODE_FINISH(bl);
}
@@ -472,6 +489,17 @@ void PGMap::decode(bufferlist::iterator &bl)
}
if (struct_v >= 5)
::decode(stamp, bl);
+ if (struct_v >= 6) {
+ ::decode(osd_epochs, bl);
+ } else {
+ for (hash_map<int32_t, osd_stat_t>::iterator i = osd_stat.begin();
+ i != osd_stat.end();
+ ++i) {
+ // This isn't accurate, but will cause trimming to behave like
+ // previously.
+ osd_epochs.insert(make_pair(i->first, last_osdmap_epoch));
+ }
+ }
DECODE_FINISH(bl);
calc_stats();
@@ -488,7 +516,10 @@ void PGMap::dirty_all(Incremental& inc)
inc.pg_stat_updates[p->first] = p->second;
}
for (hash_map<int32_t, osd_stat_t>::const_iterator p = osd_stat.begin(); p != osd_stat.end(); ++p) {
- inc.osd_stat_updates[p->first] = p->second;
+ assert(inc.get_osd_epochs().count(p->first));
+ inc.update_stat(p->first,
+ inc.get_osd_epochs().find(p->first)->second,
+ p->second);
}
}
diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h
index 84d89f87517..7a202fc0006 100644
--- a/src/mon/PGMap.h
+++ b/src/mon/PGMap.h
@@ -43,12 +43,13 @@ public:
float full_ratio;
float nearfull_ratio;
+ // mapping of osd to most recently reported osdmap epoch
+ hash_map<int32_t,epoch_t> osd_epochs;
+
class Incremental {
public:
version_t version;
map<pg_t,pg_stat_t> pg_stat_updates;
- map<int32_t,osd_stat_t> osd_stat_updates;
- set<int32_t> osd_stat_rm;
epoch_t osdmap_epoch;
epoch_t pg_scan; // osdmap epoch
set<pg_t> pg_remove;
@@ -56,6 +57,38 @@ public:
float nearfull_ratio;
utime_t stamp;
+ private:
+ map<int32_t,osd_stat_t> osd_stat_updates;
+ set<int32_t> osd_stat_rm;
+
+ // mapping of osd to most recently reported osdmap epoch
+ map<int32_t,epoch_t> osd_epochs;
+ public:
+
+ const map<int32_t, osd_stat_t> &get_osd_stat_updates() const {
+ return osd_stat_updates;
+ }
+ const set<int32_t> &get_osd_stat_rm() const {
+ return osd_stat_rm;
+ }
+ const map<int32_t, epoch_t> &get_osd_epochs() const {
+ return osd_epochs;
+ }
+
+ void update_stat(int32_t osd, epoch_t epoch, const osd_stat_t &stat) {
+ osd_stat_updates[osd] = stat;
+ osd_epochs[osd] = epoch;
+ assert(osd_epochs.size() == osd_stat_updates.size());
+ }
+ void stat_osd_out(int32_t osd) {
+ // 0 the stats for the osd
+ osd_stat_updates[osd] = osd_stat_t();
+ }
+ void rm_stat(int32_t osd) {
+ osd_stat_rm.insert(osd);
+ osd_epochs.erase(osd);
+ osd_stat_updates.erase(osd);
+ }
void encode(bufferlist &bl, uint64_t features=-1) const;
void decode(bufferlist::iterator &bl);
void dump(Formatter *f) const;
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index 0f495052747..0644922ddb4 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -494,15 +494,19 @@ void PGMonitor::encode_pending(MonitorDBStore::Transaction *t)
{
bufferlist dirty;
string prefix = pgmap_osd_prefix;
- for (map<int32_t,osd_stat_t>::const_iterator p = pending_inc.osd_stat_updates.begin();
- p != pending_inc.osd_stat_updates.end();
+ for (map<int32_t,osd_stat_t>::const_iterator p =
+ pending_inc.get_osd_stat_updates().begin();
+ p != pending_inc.get_osd_stat_updates().end();
++p) {
::encode(p->first, dirty);
bufferlist bl;
::encode(p->second, bl, features);
t->put(prefix, stringify(p->first), bl);
}
- for (set<int32_t>::const_iterator p = pending_inc.osd_stat_rm.begin(); p != pending_inc.osd_stat_rm.end(); ++p) {
+ for (set<int32_t>::const_iterator p =
+ pending_inc.get_osd_stat_rm().begin();
+ p != pending_inc.get_osd_stat_rm().end();
+ ++p) {
::encode(*p, dirty);
t->erase(prefix, stringify(*p));
}
@@ -725,7 +729,11 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats)
}
// osd stat
- pending_inc.osd_stat_updates[from] = stats->osd_stat;
+ if (mon->osdmon()->osdmap.is_in(from)) {
+ pending_inc.update_stat(from, stats->epoch, stats->osd_stat);
+ } else {
+ pending_inc.update_stat(from, stats->epoch, osd_stat_t());
+ }
if (pg_map.osd_stat.count(from))
dout(10) << " got osd." << from << " " << stats->osd_stat << " (was " << pg_map.osd_stat[from] << ")" << dendl;
@@ -842,11 +850,7 @@ void PGMonitor::check_osd_map(epoch_t epoch)
++p)
if (p->second == CEPH_OSD_OUT) {
dout(10) << "check_osd_map osd." << p->first << " went OUT" << dendl;
- pending_inc.osd_stat_rm.insert(p->first);
- } else {
- dout(10) << "check_osd_map osd." << p->first << " is IN" << dendl;
- pending_inc.osd_stat_rm.erase(p->first);
- pending_inc.osd_stat_updates[p->first];
+ pending_inc.stat_osd_out(p->first);
}
// this is conservative: we want to know if any osds (maybe) got marked down.
@@ -867,7 +871,7 @@ void PGMonitor::check_osd_map(epoch_t epoch)
// whether it was created *or* destroyed, we can safely drop
// it's osd_stat_t record.
dout(10) << "check_osd_map osd." << p->first << " created or destroyed" << dendl;
- pending_inc.osd_stat_rm.insert(p->first);
+ pending_inc.rm_stat(p->first);
// and adjust full, nearfull set
pg_map.nearfull_osds.erase(p->first);