summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Farnum <greg@inktank.com>2013-09-23 15:41:14 -0700
committerGreg Farnum <greg@inktank.com>2013-09-23 15:41:14 -0700
commit7741de0ba43dc47779736465bffa2c9bbcf8fa81 (patch)
treeaaac43e4b74d66a432c5496aee65ae29fea119f4
parent73289b34b0be5b6612e38944794d59b5e789f841 (diff)
parent1425119b9a6b588d3d704beabfa59476f55a3353 (diff)
downloadceph-7741de0ba43dc47779736465bffa2c9bbcf8fa81.tar.gz
Merge branch 'master' of github.com:ceph/ceph
-rw-r--r--src/common/config_opts.h2
-rw-r--r--src/mon/PGMonitor.cc46
-rwxr-xr-xsrc/vstart.sh1
3 files changed, 49 insertions, 0 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index f6283239660..1f616e2c970 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -158,6 +158,8 @@ OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock
OPTION(mon_accept_timeout, OPT_FLOAT, 10.0) // on leader, if paxos update isn't accepted
OPTION(mon_pg_create_interval, OPT_FLOAT, 30.0) // no more than every 30s
OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
+OPTION(mon_pg_warn_min_per_osd, OPT_INT, 20) // min # pgs per (in) osd before we warn the admin
+OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg
OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full"
OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full
OPTION(mon_globalid_prealloc, OPT_INT, 100) // how many globalids to prealloc
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index 2a677be61d9..76cfde7de0e 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -1847,6 +1847,52 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
detail->push_back(make_pair(HEALTH_ERR, ss.str()));
}
}
+
+ // pg skew
+ int num_in = mon->osdmon()->osdmap.get_num_in_osds();
+ if (num_in && g_conf->mon_pg_warn_min_per_osd > 0) {
+ int per = pg_map.pg_stat.size() / num_in;
+ if (per < g_conf->mon_pg_warn_min_per_osd) {
+ ostringstream ss;
+ ss << "too few pgs per osd (" << per << " < min " << g_conf->mon_pg_warn_min_per_osd << ")";
+ summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ if (detail)
+ detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
+ }
+ if (!pg_map.pg_stat.empty()) {
+ for (hash_map<int,pool_stat_t>::const_iterator p = pg_map.pg_pool_sum.begin();
+ p != pg_map.pg_pool_sum.end();
+ ++p) {
+ const pg_pool_t *pi = mon->osdmon()->osdmap.get_pg_pool(p->first);
+ if (pi->get_pg_num() > pi->get_pgp_num()) {
+ ostringstream ss;
+ ss << "pool " << mon->osdmon()->osdmap.get_pool_name(p->first) << " pg_num "
+ << pi->get_pg_num() << " > pgp_num " << pi->get_pgp_num();
+ summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ if (detail)
+ detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
+ int average_objects_per_pg = pg_map.pg_sum.stats.sum.num_objects / pg_map.pg_stat.size();
+ if (average_objects_per_pg > 0) {
+ int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num();
+ float ratio = (float)objects_per_pg / (float)average_objects_per_pg;
+ if (g_conf->mon_pg_warn_max_object_skew > 0 &&
+ ratio > g_conf->mon_pg_warn_max_object_skew) {
+ ostringstream ss;
+ ss << "pool " << mon->osdmon()->osdmap.get_pool_name(p->first) << " has too few pgs";
+ summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ if (detail) {
+ ostringstream ss;
+ ss << "pool " << mon->osdmon()->osdmap.get_pool_name(p->first) << " objects per pg ("
+ << objects_per_pg << ") is more than " << ratio << " times cluster average ("
+ << average_objects_per_pg << ")";
+ detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
+ }
+ }
+ }
+ }
}
void PGMonitor::check_full_osd_health(list<pair<health_status_t,string> >& summary,
diff --git a/src/vstart.sh b/src/vstart.sh
index c112bfc9138..4c04ef96410 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -339,6 +339,7 @@ $DAEMONOPTS
$COSDDEBUG
$extra_conf
[mon]
+ mon pg warn min per osd = 10
$DAEMONOPTS
$CMONDEBUG
$extra_conf