osd: report pg stats to mon at least every N (=500) epochs

The mon needs a moderately accurate last_epoch_clean value in order to trim old osdmaps. To prevent a PG that hasn't peered or received IO in forever from preventing this, send pg stats at some minimum frequency. This will increase the pg stat report workload for the mon over an idle pool, but should be no worse that a cluster that is getting actual IO and sees these updates from normal stat updates. This makes the reported update a bit more aggressive/useful in that the epoch is the last map epoch processed by this PG and not just one that is >= the currenting interval. Note that the semantics of this field are pretty useless at this point. See #5519 Signed-off-by: Sage Weil <sage@inktank.com> (cherry picked from commit da81228cc73c95737f26c630e5c3eccf6ae1aaec)
author: Sage Weil <sage@inktank.com> 2013-07-08 13:27:58 -0700
committer: Sage Weil <sage@inktank.com> 2013-07-12 15:44:44 -0700
commit: da725852190245d2f91b7b21e72baee70e4342bd (patch)
tree: 5d3c33ccaa30aa92e41e3209a09d4709deaf878a
parent: 757af91b2af0da6bbfeeb53551fa1ef4ef9118ea (diff)
download: ceph-da725852190245d2f91b7b21e72baee70e4342bd.tar.gz
2 files changed, 10 insertions, 1 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index d040c9b0d9b..c31282c1997 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -410,6 +410,7 @@ OPTION(osd_heartbeat_grace, OPT_INT, 20)         // (seconds) how long before we
 OPTION(osd_mon_heartbeat_interval, OPT_INT, 30)  // (seconds) how often to ping monitor if no peers
 OPTION(osd_mon_report_interval_max, OPT_INT, 120)
 OPTION(osd_mon_report_interval_min, OPT_INT, 5)  // pg stats, failures, up_thru, boot.
+OPTION(osd_pg_stat_report_interval_max, OPT_INT, 500)  // report pg stats for any given pg at least this often
 OPTION(osd_mon_ack_timeout, OPT_INT, 30) // time out a mon if it doesn't ack stats
 OPTION(osd_min_down_reporters, OPT_INT, 1)   // number of OSDs who need to report a down OSD for it to count
 OPTION(osd_min_down_reports, OPT_INT, 3)     // number of times a down OSD must be reported for it to count
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 5e477ac31fe..fb8b26ff389 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -2303,7 +2303,7 @@ void PG::publish_stats_to_osd()
   pg_stats_publish_lock.Lock();
   if (is_primary()) {
     // update our stat summary
-    info.stats.reported.inc(info.history.same_primary_since);
+    info.stats.reported.inc(get_osdmap()->get_epoch());
     info.stats.version = info.last_update;
     info.stats.created = info.history.epoch_created;
     info.stats.last_scrub = info.history.last_scrub;
@@ -6665,6 +6665,14 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap)
       pg->state_set(PG_STATE_DEGRADED);
     pg->publish_stats_to_osd(); // degraded may have changed
   }
+
+  // if we haven't reported our PG stats in a long time, do so now.
+  if (pg->info.stats.reported.epoch + g_conf->osd_pg_stat_report_interval_max < advmap.osdmap->get_epoch()) {
+    dout(20) << "reporting stats to osd after " << (advmap.osdmap->get_epoch() - pg->info.stats.reported.epoch)
+	     << " epochs" << dendl;
+    pg->publish_stats_to_osd();
+  }
+
   return forward_event();
 }
author	Sage Weil <sage@inktank.com>	2013-07-08 13:27:58 -0700
committer	Sage Weil <sage@inktank.com>	2013-07-12 15:44:44 -0700
commit	da725852190245d2f91b7b21e72baee70e4342bd (patch)
tree	5d3c33ccaa30aa92e41e3209a09d4709deaf878a
parent	757af91b2af0da6bbfeeb53551fa1ef4ef9118ea (diff)
download	ceph-da725852190245d2f91b7b21e72baee70e4342bd.tar.gz