summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-10-16 14:13:03 -0700
committerSage Weil <sage@inktank.com>2013-10-16 14:13:03 -0700
commit5c476277a61902a002401d1655789a87037bc5ac (patch)
treebccc17e80dbb0686f84a715ea0086613ac37e694
parenta6a3f50dceb92dd7c5eb0f6e956e491ed16e94e6 (diff)
parent14e91bf1246eed944adf8bbad627c030654da142 (diff)
downloadceph-5c476277a61902a002401d1655789a87037bc5ac.tar.gz
Merge remote-tracking branch 'gh/next'
-rw-r--r--ceph.spec.in2
-rw-r--r--debian/ceph-test.install2
-rw-r--r--src/common/config_opts.h2
-rw-r--r--src/mon/MonCommands.h6
-rw-r--r--src/mon/OSDMonitor.cc99
-rw-r--r--src/mon/PGMap.cc299
-rw-r--r--src/mon/PGMap.h73
-rw-r--r--src/mon/PGMonitor.cc39
-rw-r--r--src/os/LevelDBStore.h4
-rw-r--r--src/rgw/rgw_acl.cc2
-rw-r--r--src/rgw/rgw_main.cc2
-rw-r--r--src/rgw/rgw_rados.cc82
-rw-r--r--src/rgw/rgw_rados.h4
-rw-r--r--src/test/Makefile.am5
-rwxr-xr-xsrc/test/filestore/run_seed_to.sh9
-rw-r--r--src/test/osd/TestRados.cc13
-rw-r--r--src/tools/Makefile.am6
-rw-r--r--src/tools/ceph-kvstore-tool.cc (renamed from src/test/ObjectMap/test_store_tool/test_store_tool.cc)128
18 files changed, 642 insertions, 135 deletions
diff --git a/ceph.spec.in b/ceph.spec.in
index bcb1214cc93..ee4b84ef090 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -665,12 +665,12 @@ fi
%{_bindir}/ceph_test_rados_watch_notify
%{_bindir}/ceph_test_signal_handlers
%{_bindir}/ceph_test_snap_mapper
-%{_bindir}/ceph_test_store_tool
%{_bindir}/ceph_test_timers
%{_bindir}/ceph_tpbench
%{_bindir}/ceph_xattr_bench
%{_bindir}/ceph-monstore-tool
%{_bindir}/ceph-osdomap-tool
+%{_bindir}/ceph-kvstore-tool
%files -n libcephfs_jni1
%defattr(-,root,root,-)
diff --git a/debian/ceph-test.install b/debian/ceph-test.install
index c5a5e0a9774..237a05850be 100644
--- a/debian/ceph-test.install
+++ b/debian/ceph-test.install
@@ -67,7 +67,6 @@ usr/bin/ceph_test_rados_watch_notify
usr/bin/ceph_test_rewrite_latency
usr/bin/ceph_test_signal_handlers
usr/bin/ceph_test_snap_mapper
-usr/bin/ceph_test_store_tool
usr/bin/ceph_test_stress_watch
usr/bin/ceph_test_timers
usr/bin/ceph_test_trans
@@ -75,4 +74,5 @@ usr/bin/ceph_tpbench
usr/bin/ceph_xattr_bench
usr/bin/ceph-monstore-tool
usr/bin/ceph-osdomap-tool
+usr/bin/ceph-kvstore-tool
usr/share/java/libcephfs-test.jar
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index b419dec88b5..0b3938ecb9e 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -160,6 +160,8 @@ OPTION(mon_pg_create_interval, OPT_FLOAT, 30.0) // no more than every 30s
OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
OPTION(mon_pg_warn_min_per_osd, OPT_INT, 20) // min # pgs per (in) osd before we warn the admin
OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg
+OPTION(mon_pg_warn_min_objects, OPT_INT, 10000) // do not warn below this object #
+OPTION(mon_pg_warn_min_pool_objects, OPT_INT, 1000) // do not warn on pools below this object #
OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full"
OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full
OPTION(mon_globalid_prealloc, OPT_INT, 100) // how many globalids to prealloc
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index ae6bffe0d7d..5a6ca6a471d 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -112,7 +112,7 @@ COMMAND("pg send_pg_creates", "trigger pg creates to be issued",\
"pg", "rw", "cli,rest")
COMMAND("pg dump " \
"name=dumpcontents,type=CephChoices,strings=all|summary|sum|delta|pools|osds|pgs|pgs_brief,n=N,req=false", \
- "show human-readable versions of pg map", "pg", "r", "cli,rest")
+ "show human-readable versions of pg map (only 'all' valid with plain)", "pg", "r", "cli,rest")
COMMAND("pg dump_json " \
"name=dumpcontents,type=CephChoices,strings=all|summary|sum|pools|osds|pgs,n=N,req=false", \
"show human-readable version of pg map in json only",\
@@ -518,6 +518,10 @@ COMMAND("osd pool set-quota " \
"name=field,type=CephChoices,strings=max_objects|max_bytes " \
"name=val,type=CephString",
"set object or byte limit on pool", "osd", "rw", "cli,rest")
+COMMAND("osd pool stats " \
+ "name=name,type=CephString,req=false",
+ "obtain stats from all pools, or from specified pool",
+ "osd", "r", "cli,rest")
COMMAND("osd reweight-by-utilization " \
"name=oload,type=CephInt,range=100,req=false", \
"reweight OSDs by utilization [overload-percentage-for-consideration, default 120]", \
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 425375b29e2..83e85847045 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -2296,6 +2296,105 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
}
r = 0;
+ } else if (prefix == "osd pool stats") {
+ string pool_name;
+ cmd_getval(g_ceph_context, cmdmap, "name", pool_name);
+
+ PGMap& pg_map = mon->pgmon()->pg_map;
+
+ int64_t poolid = -ENOENT;
+ bool one_pool = false;
+ if (!pool_name.empty()) {
+ poolid = osdmap.lookup_pg_pool_name(pool_name);
+ if (poolid < 0) {
+ assert(poolid == -ENOENT);
+ ss << "unrecognized pool '" << pool_name << "'";
+ r = -ENOENT;
+ goto reply;
+ }
+ one_pool = true;
+ }
+
+ stringstream rs;
+
+ if (f)
+ f->open_array_section("pool_stats");
+ if (osdmap.get_pools().size() == 0) {
+ if (!f)
+ ss << "there are no pools!";
+ goto stats_out;
+ }
+
+ for (map<int64_t,pg_pool_t>::const_iterator it = osdmap.get_pools().begin();
+ it != osdmap.get_pools().end();
+ ++it) {
+
+ if (!one_pool)
+ poolid = it->first;
+
+ pool_name = osdmap.get_pool_name(poolid);
+
+ if (f) {
+ f->open_object_section("pool");
+ f->dump_string("pool_name", pool_name.c_str());
+ f->dump_int("pool_id", poolid);
+ f->open_object_section("recovery");
+ }
+
+ stringstream rss, tss;
+ pg_map.pool_recovery_summary(f.get(), &rss, poolid);
+ if (!f && !rss.str().empty())
+ tss << " " << rss.str() << "\n";
+
+ if (f) {
+ f->close_section();
+ f->open_object_section("recovery_rate");
+ }
+
+ rss.clear();
+ rss.str("");
+
+ pg_map.pool_recovery_rate_summary(f.get(), &rss, poolid);
+ if (!f && !rss.str().empty())
+ tss << " recovery io " << rss.str() << "\n";
+
+ if (f) {
+ f->close_section();
+ f->open_object_section("client_io_rate");
+ }
+
+ rss.clear();
+ rss.str("");
+
+ pg_map.pool_client_io_rate_summary(f.get(), &rss, poolid);
+ if (!f && !rss.str().empty())
+ tss << " client io " << rss.str() << "\n";
+
+ if (f) {
+ f->close_section();
+ f->close_section();
+ } else {
+ rs << "pool " << pool_name << " id " << poolid << "\n";
+ if (!tss.str().empty())
+ rs << tss.str() << "\n";
+ else
+ rs << " nothing is going on\n\n";
+ }
+
+ if (one_pool)
+ break;
+ }
+
+stats_out:
+ if (f) {
+ f->close_section();
+ f->flush(rdata);
+ } else {
+ rdata.append(rs.str());
+ }
+ rdata.append("\n");
+ r = 0;
+
} else if (prefix == "osd crush rule list" ||
prefix == "osd crush rule ls") {
string format;
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index ea70bbd61c3..39cb30f97c8 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -180,6 +180,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
stamp = inc.stamp;
pool_stat_t pg_sum_old = pg_sum;
+ hash_map<uint64_t, pool_stat_t> pg_pool_sum_old;
bool ratios_changed = false;
if (inc.full_ratio != full_ratio && inc.full_ratio != -1) {
@@ -199,6 +200,9 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
const pg_t &update_pg(p->first);
const pg_stat_t &update_stat(p->second);
+ if (pg_pool_sum_old.count(update_pg.pool()) == 0)
+ pg_pool_sum_old[update_pg.pool()] = pg_pool_sum[update_pg.pool()];
+
hash_map<pg_t,pg_stat_t>::iterator t = pg_stat.find(update_pg);
if (t == pg_stat.end()) {
hash_map<pg_t,pg_stat_t>::value_type v(update_pg, update_stat);
@@ -216,7 +220,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
++p) {
int osd = p->first;
const osd_stat_t &new_stats(p->second);
-
+
hash_map<int32_t,osd_stat_t>::iterator t = osd_stat.find(osd);
if (t == osd_stat.end()) {
hash_map<int32_t,osd_stat_t>::value_type v(osd, new_stats);
@@ -229,7 +233,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
osd_epochs.insert(*(inc.get_osd_epochs().find(osd)));
stat_osd_add(new_stats);
-
+
// adjust [near]full status
register_nearfull_status(osd, new_stats);
}
@@ -243,7 +247,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
pg_stat.erase(s);
}
}
-
+
for (set<int>::iterator p = inc.get_osd_stat_rm().begin();
p != inc.get_osd_stat_rm().end();
++p) {
@@ -270,7 +274,9 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
stamp_delta -= pg_sum_deltas.front().second;
pg_sum_deltas.pop_front();
}
-
+
+ update_pool_deltas(cct, inc.stamp, pg_pool_sum_old);
+
if (inc.osdmap_epoch)
last_osdmap_epoch = inc.osdmap_epoch;
if (inc.pg_scan)
@@ -780,54 +786,59 @@ void PGMap::print_osd_perf_stats(std::ostream *ss) const
(*ss) << tab;
}
-void PGMap::recovery_summary(Formatter *f, ostream *out) const
+void PGMap::recovery_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum) const
{
bool first = true;
- if (pg_sum.stats.sum.num_objects_degraded) {
- double pc = (double)pg_sum.stats.sum.num_objects_degraded / (double)pg_sum.stats.sum.num_object_copies * (double)100.0;
+ if (delta_sum.stats.sum.num_objects_degraded) {
+ double pc = (double)delta_sum.stats.sum.num_objects_degraded /
+ (double)delta_sum.stats.sum.num_object_copies * (double)100.0;
char b[20];
snprintf(b, sizeof(b), "%.3lf", pc);
if (f) {
- f->dump_unsigned("degraded_objects", pg_sum.stats.sum.num_objects_degraded);
- f->dump_unsigned("degraded_total", pg_sum.stats.sum.num_object_copies);
+ f->dump_unsigned("degraded_objects", delta_sum.stats.sum.num_objects_degraded);
+ f->dump_unsigned("degraded_total", delta_sum.stats.sum.num_object_copies);
f->dump_string("degrated_ratio", b);
} else {
- *out << pg_sum.stats.sum.num_objects_degraded
- << "/" << pg_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)";
+ *out << delta_sum.stats.sum.num_objects_degraded
+ << "/" << delta_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)";
}
first = false;
}
- if (pg_sum.stats.sum.num_objects_unfound) {
- double pc = (double)pg_sum.stats.sum.num_objects_unfound / (double)pg_sum.stats.sum.num_objects * (double)100.0;
+ if (delta_sum.stats.sum.num_objects_unfound) {
+ double pc = (double)delta_sum.stats.sum.num_objects_unfound /
+ (double)delta_sum.stats.sum.num_objects * (double)100.0;
char b[20];
snprintf(b, sizeof(b), "%.3lf", pc);
if (f) {
- f->dump_unsigned("unfound_objects", pg_sum.stats.sum.num_objects_unfound);
- f->dump_unsigned("unfound_total", pg_sum.stats.sum.num_objects);
+ f->dump_unsigned("unfound_objects", delta_sum.stats.sum.num_objects_unfound);
+ f->dump_unsigned("unfound_total", delta_sum.stats.sum.num_objects);
f->dump_string("unfound_ratio", b);
} else {
if (!first)
*out << "; ";
- *out << pg_sum.stats.sum.num_objects_unfound
- << "/" << pg_sum.stats.sum.num_objects << " unfound (" << b << "%)";
+ *out << delta_sum.stats.sum.num_objects_unfound
+ << "/" << delta_sum.stats.sum.num_objects << " unfound (" << b << "%)";
}
first = false;
}
}
-void PGMap::recovery_rate_summary(Formatter *f, ostream *out) const
+void PGMap::recovery_rate_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum,
+ utime_t delta_stamp) const
{
// make non-negative; we can get negative values if osds send
// uncommitted stats and then "go backward" or if they are just
// buggy/wrong.
- pool_stat_t pos_delta = pg_sum_delta;
+ pool_stat_t pos_delta = delta_sum;
pos_delta.floor(0);
if (pos_delta.stats.sum.num_objects_recovered ||
pos_delta.stats.sum.num_bytes_recovered ||
pos_delta.stats.sum.num_keys_recovered) {
- int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)stamp_delta;
- int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)stamp_delta;
- int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)stamp_delta;
+ int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)delta_stamp;
+ int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)delta_stamp;
+ int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)delta_stamp;
if (f) {
f->dump_int("recovering_objects_per_sec", objps);
f->dump_int("recovering_bytes_per_sec", bps);
@@ -841,24 +852,194 @@ void PGMap::recovery_rate_summary(Formatter *f, ostream *out) const
}
}
-void PGMap::update_delta(CephContext *cct, utime_t inc_stamp, pool_stat_t& pg_sum_old)
+void PGMap::overall_recovery_rate_summary(Formatter *f, ostream *out) const
+{
+ recovery_rate_summary(f, out, pg_sum_delta, stamp_delta);
+}
+
+void PGMap::overall_recovery_summary(Formatter *f, ostream *out) const
+{
+ recovery_summary(f, out, pg_sum);
+}
+
+void PGMap::pool_recovery_rate_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const
{
+ hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p =
+ per_pool_sum_delta.find(poolid);
+ if (p == per_pool_sum_delta.end())
+ return;
+ hash_map<uint64_t,utime_t>::const_iterator ts =
+ per_pool_sum_deltas_stamps.find(p->first);
+ assert(ts != per_pool_sum_deltas_stamps.end());
+ recovery_rate_summary(f, out, p->second.first, ts->second);
+}
+
+void PGMap::pool_recovery_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const
+{
+ hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p =
+ per_pool_sum_delta.find(poolid);
+ if (p == per_pool_sum_delta.end())
+ return;
+ recovery_summary(f, out, p->second.first);
+}
+
+void PGMap::client_io_rate_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum,
+ utime_t delta_stamp) const
+{
+ pool_stat_t pos_delta = delta_sum;
+ pos_delta.floor(0);
+ if (pos_delta.stats.sum.num_rd ||
+ pos_delta.stats.sum.num_wr) {
+ if (pos_delta.stats.sum.num_rd) {
+ int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)delta_stamp;
+ if (f) {
+ f->dump_int("read_bytes_sec", rd);
+ } else {
+ *out << pretty_si_t(rd) << "B/s rd, ";
+ }
+ }
+ if (pos_delta.stats.sum.num_wr) {
+ int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)delta_stamp;
+ if (f) {
+ f->dump_int("write_bytes_sec", wr);
+ } else {
+ *out << pretty_si_t(wr) << "B/s wr, ";
+ }
+ }
+ int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)delta_stamp;
+ if (f) {
+ f->dump_int("op_per_sec", iops);
+ } else {
+ *out << pretty_si_t(iops) << "op/s";
+ }
+ }
+}
+
+void PGMap::overall_client_io_rate_summary(Formatter *f, ostream *out) const
+{
+ client_io_rate_summary(f, out, pg_sum_delta, stamp_delta);
+}
+
+void PGMap::pool_client_io_rate_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const
+{
+ hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p =
+ per_pool_sum_delta.find(poolid);
+ if (p == per_pool_sum_delta.end())
+ return;
+ hash_map<uint64_t,utime_t>::const_iterator ts =
+ per_pool_sum_deltas_stamps.find(p->first);
+ assert(ts != per_pool_sum_deltas_stamps.end());
+ client_io_rate_summary(f, out, p->second.first, ts->second);
+}
+
+/**
+ * update aggregated delta
+ *
+ * @param cct ceph context
+ * @param ts Timestamp for the stats being delta'ed
+ * @param old_pool_sum Previous stats sum
+ * @param last_ts Last timestamp for pool
+ * @param result_pool_sum Resulting stats
+ * @param result_ts_delta Resulting timestamp delta
+ * @param delta_avg_list List of last N computed deltas, used to average
+ */
+void PGMap::update_delta(CephContext *cct,
+ const utime_t ts,
+ const pool_stat_t& old_pool_sum,
+ utime_t *last_ts,
+ const pool_stat_t& current_pool_sum,
+ pool_stat_t *result_pool_delta,
+ utime_t *result_ts_delta,
+ list<pair<pool_stat_t,utime_t> > *delta_avg_list)
+{
+ /* @p ts is the timestamp we want to associate with the data
+ * in @p old_pool_sum, and on which we will base ourselves to
+ * calculate the delta, stored in 'delta_t'.
+ */
utime_t delta_t;
- delta_t = inc_stamp;
- delta_t -= stamp;
- stamp = inc_stamp;
+ delta_t = ts; // start with the provided timestamp
+ delta_t -= *last_ts; // take the last timestamp we saw
+ *last_ts = ts; // @p ts becomes the last timestamp we saw
// calculate a delta, and average over the last 2 deltas.
- pool_stat_t d = pg_sum;
- d.stats.sub(pg_sum_old.stats);
- pg_sum_deltas.push_back(make_pair(d, delta_t));
- stamp_delta += delta_t;
+ /* start by taking a copy of our current @p result_pool_sum, and by
+ * taking out the stats from @p old_pool_sum. This generates a stats
+ * delta. Stash this stats delta in @p delta_avg_list, along with the
+ * timestamp delta for these results.
+ */
+ pool_stat_t d = current_pool_sum;
+ d.stats.sub(old_pool_sum.stats);
+ delta_avg_list->push_back(make_pair(d,delta_t));
+ *result_ts_delta += delta_t;
+
+ /* Aggregate current delta, and take out the last seen delta (if any) to
+ * average it out.
+ */
+ result_pool_delta->stats.add(d.stats);
+ size_t s = MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1);
+ if (delta_avg_list->size() > s) {
+ result_pool_delta->stats.sub(delta_avg_list->front().first.stats);
+ *result_ts_delta -= delta_avg_list->front().second;
+ delta_avg_list->pop_front();
+ }
+}
- pg_sum_delta.stats.add(d.stats);
- if (pg_sum_deltas.size() > (std::list< pair<pool_stat_t, utime_t> >::size_type)MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1)) {
- pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats);
- stamp_delta -= pg_sum_deltas.front().second;
- pg_sum_deltas.pop_front();
+/**
+ * update aggregated delta
+ *
+ * @param cct ceph context
+ * @param ts Timestamp
+ * @param pg_sum_old Old pg_sum
+ */
+void PGMap::update_global_delta(CephContext *cct,
+ const utime_t ts, const pool_stat_t& pg_sum_old)
+{
+ update_delta(cct, ts, pg_sum_old, &stamp, pg_sum, &pg_sum_delta,
+ &stamp_delta, &pg_sum_deltas);
+}
+
+/**
+ * Update a given pool's deltas
+ *
+ * @param cct Ceph Context
+ * @param ts Timestamp for the stats being delta'ed
+ * @param pool Pool's id
+ * @param old_pool_sum Previous stats sum
+ */
+void PGMap::update_one_pool_delta(CephContext *cct,
+ const utime_t ts,
+ const uint64_t pool,
+ const pool_stat_t& old_pool_sum)
+{
+ if (per_pool_sum_deltas.count(pool) == 0) {
+ assert(per_pool_sum_deltas_stamps.count(pool) == 0);
+ assert(per_pool_sum_delta.count(pool) == 0);
+ }
+
+ pair<pool_stat_t,utime_t>& sum_delta = per_pool_sum_delta[pool];
+
+ update_delta(cct, ts, old_pool_sum, &sum_delta.second, pg_pool_sum[pool],
+ &sum_delta.first, &per_pool_sum_deltas_stamps[pool],
+ &per_pool_sum_deltas[pool]);
+}
+
+/**
+ * Update pools' deltas
+ *
+ * @param cct CephContext
+ * @param ts Timestamp for the stats being delta'ed
+ * @param pg_pool_sum_old Map of pool stats for delta calcs.
+ */
+void PGMap::update_pool_deltas(CephContext *cct, const utime_t ts,
+ const hash_map<uint64_t,pool_stat_t>& pg_pool_sum_old)
+{
+ for (hash_map<uint64_t,pool_stat_t>::const_iterator it = pg_pool_sum_old.begin();
+ it != pg_pool_sum_old.end(); ++it) {
+ update_one_pool_delta(cct, ts, it->first, it->second);
}
}
@@ -911,7 +1092,7 @@ void PGMap::print_summary(Formatter *f, ostream *out) const
}
std::stringstream ssr;
- recovery_summary(f, &ssr);
+ overall_recovery_summary(f, &ssr);
if (!f && ssr.str().length())
*out << " " << ssr.str() << "\n";
ssr.clear();
@@ -920,43 +1101,17 @@ void PGMap::print_summary(Formatter *f, ostream *out) const
if (!f)
*out << ss.str(); // pgs by state
- recovery_rate_summary(f, &ssr);
+ overall_recovery_rate_summary(f, &ssr);
if (!f && ssr.str().length())
*out << "recovery io " << ssr.str() << "\n";
- // make non-negative; we can get negative values if osds send
- // uncommitted stats and then "go backward" or if they are just
- // buggy/wrong.
- pool_stat_t pos_delta = pg_sum_delta;
- pos_delta.floor(0);
- if (pos_delta.stats.sum.num_rd ||
- pos_delta.stats.sum.num_wr) {
- if (!f)
- *out << " client io ";
- if (pos_delta.stats.sum.num_rd) {
- int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta;
- if (f) {
- f->dump_int("read_bytes_sec", rd);
- } else {
- *out << pretty_si_t(rd) << "B/s rd, ";
- }
- }
- if (pos_delta.stats.sum.num_wr) {
- int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta;
- if (f) {
- f->dump_int("write_bytes_sec", wr);
- } else {
- *out << pretty_si_t(wr) << "B/s wr, ";
- }
- }
- int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta;
- if (f) {
- f->dump_int("op_per_sec", iops);
- } else {
- *out << pretty_si_t(iops) << "op/s";
- *out << "\n";
- }
- }
+ ssr.clear();
+ ssr.str("");
+
+ overall_client_io_rate_summary(f, &ssr);
+ if (!f && ssr.str().length())
+ *out << " client io " << ssr.str() << "\n";
+
}
@@ -1002,12 +1157,12 @@ void PGMap::print_oneline_summary(ostream *out) const
}
std::stringstream ssr;
- recovery_summary(NULL, &ssr);
+ overall_recovery_summary(NULL, &ssr);
if (ssr.str().length())
*out << "; " << ssr.str();
ssr.clear();
ssr.str("");
- recovery_rate_summary(NULL, &ssr);
+ overall_recovery_rate_summary(NULL, &ssr);
if (ssr.str().length())
*out << "; " << ssr.str() << " recovering";
}
diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h
index 7a202fc0006..c8ce7fd973e 100644
--- a/src/mon/PGMap.h
+++ b/src/mon/PGMap.h
@@ -109,13 +109,51 @@ public:
utime_t stamp;
// recent deltas, and summation
+ /**
+ * keep track of last deltas for each pool, calculated using
+ * @p pg_pool_sum as baseline.
+ */
+ hash_map<uint64_t, list< pair<pool_stat_t, utime_t> > > per_pool_sum_deltas;
+ /**
+ * keep track of per-pool timestamp deltas, according to last update on
+ * each pool.
+ */
+ hash_map<uint64_t, utime_t> per_pool_sum_deltas_stamps;
+ /**
+ * keep track of sum deltas, per-pool, taking into account any previous
+ * deltas existing in @p per_pool_sum_deltas. The utime_t as second member
+ * of the pair is the timestamp refering to the last update (i.e., the first
+ * member of the pair) for a given pool.
+ */
+ hash_map<uint64_t, pair<pool_stat_t,utime_t> > per_pool_sum_delta;
+
list< pair<pool_stat_t, utime_t> > pg_sum_deltas;
pool_stat_t pg_sum_delta;
utime_t stamp_delta;
- void update_delta(CephContext *cct, utime_t inc_stamp, pool_stat_t& pg_sum_old);
+ void update_global_delta(CephContext *cct,
+ const utime_t ts, const pool_stat_t& pg_sum_old);
+ void update_pool_deltas(CephContext *cct,
+ const utime_t ts,
+ const hash_map<uint64_t, pool_stat_t>& pg_pool_sum_old);
void clear_delta();
+ private:
+ void update_delta(CephContext *cct,
+ const utime_t ts,
+ const pool_stat_t& old_pool_sum,
+ utime_t *last_ts,
+ const pool_stat_t& current_pool_sum,
+ pool_stat_t *result_pool_delta,
+ utime_t *result_ts_delta,
+ list<pair<pool_stat_t,utime_t> > *delta_avg_list);
+
+ void update_one_pool_delta(CephContext *cct,
+ const utime_t ts,
+ const uint64_t pool,
+ const pool_stat_t& old_pool_sum);
+ public:
+
set<pg_t> creating_pgs; // lru: front = new additions, back = recently pinged
map<int,set<pg_t> > creating_pgs_by_osd;
@@ -205,8 +243,37 @@ public:
void dump_osd_perf_stats(Formatter *f) const;
void print_osd_perf_stats(std::ostream *ss) const;
- void recovery_summary(Formatter *f, ostream *out) const;
- void recovery_rate_summary(Formatter *f, ostream *out) const;
+ void recovery_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum) const;
+ void overall_recovery_summary(Formatter *f, ostream *out) const;
+ void pool_recovery_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const;
+ void recovery_rate_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum,
+ utime_t delta_stamp) const;
+ void overall_recovery_rate_summary(Formatter *f, ostream *out) const;
+ void pool_recovery_rate_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const;
+ /**
+ * Obtain a formatted/plain output for client I/O, source from stats for a
+ * given @p delta_sum pool over a given @p delta_stamp period of time.
+ */
+ void client_io_rate_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum,
+ utime_t delta_stamp) const;
+ /**
+ * Obtain a formatted/plain output for the overall client I/O, which is
+ * calculated resorting to @p pg_sum_delta and @p stamp_delta.
+ */
+ void overall_client_io_rate_summary(Formatter *f, ostream *out) const;
+ /**
+ * Obtain a formatted/plain output for client I/O over a given pool
+ * with id @p pool_id. We will then obtain pool-specific data
+ * from @p per_pool_sum_delta.
+ */
+ void pool_client_io_rate_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const;
+
void print_summary(Formatter *f, ostream *out) const;
void print_oneline_summary(ostream *out) const;
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index 0644922ddb4..c14872d87ef 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -141,6 +141,31 @@ void PGMonitor::tick()
}
}
+ /* If we have deltas for pools, run through pgmap's 'per_pool_sum_delta' and
+ * clear any deltas that are old enough.
+ *
+ * Note that 'per_pool_sum_delta' keeps a pool id as key, and a pair containing
+ * the calc'ed stats delta and an absolute timestamp from when those stats were
+ * obtained -- the timestamp IS NOT a delta itself.
+ */
+ if (!pg_map.per_pool_sum_deltas.empty()) {
+ hash_map<uint64_t,pair<pool_stat_t,utime_t> >::iterator it;
+ for (it = pg_map.per_pool_sum_delta.begin();
+ it != pg_map.per_pool_sum_delta.end(); ) {
+ utime_t age = ceph_clock_now(g_ceph_context) - it->second.second;
+ if (age > 2*g_conf->mon_delta_reset_interval) {
+ dout(10) << " clearing pg_map delta for pool " << it->first
+ << " (" << age << " > " << g_conf->mon_delta_reset_interval
+ << " seconds old)" << dendl;
+ pg_map.per_pool_sum_deltas.erase(it->first);
+ pg_map.per_pool_sum_deltas_stamps.erase(it->first);
+ pg_map.per_pool_sum_delta.erase((it++)->first);
+ } else {
+ ++it;
+ }
+ }
+ }
+
dout(10) << pg_map << dendl;
}
@@ -401,6 +426,7 @@ void PGMonitor::apply_pgmap_delta(bufferlist& bl)
}
pool_stat_t pg_sum_old = pg_map.pg_sum;
+ hash_map<uint64_t, pool_stat_t> pg_pool_sum_old;
// pgs
bufferlist::iterator p = dirty_pgs.begin();
@@ -410,6 +436,10 @@ void PGMonitor::apply_pgmap_delta(bufferlist& bl)
dout(20) << " refreshing pg " << pgid << dendl;
bufferlist bl;
int r = mon->store->get(pgmap_pg_prefix, stringify(pgid), bl);
+
+ if (pg_pool_sum_old.count(pgid.pool()) == 0)
+ pg_pool_sum_old[pgid.pool()] = pg_map.pg_pool_sum[pgid.pool()];
+
if (r >= 0) {
pg_map.update_pg(pgid, bl);
} else {
@@ -432,7 +462,8 @@ void PGMonitor::apply_pgmap_delta(bufferlist& bl)
}
}
- pg_map.update_delta(g_ceph_context, inc_stamp, pg_sum_old);
+ pg_map.update_global_delta(g_ceph_context, inc_stamp, pg_sum_old);
+ pg_map.update_pool_deltas(g_ceph_context, inc_stamp, pg_pool_sum_old);
// ok, we're now on the new version
pg_map.version = v;
@@ -1831,7 +1862,7 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
// recovery
stringstream rss;
- pg_map.recovery_summary(NULL, &rss);
+ pg_map.overall_recovery_summary(NULL, &rss);
if (!rss.str().empty()) {
summary.push_back(make_pair(HEALTH_WARN, "recovery " + rss.str()));
if (detail)
@@ -1880,7 +1911,9 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
}
int average_objects_per_pg = pg_map.pg_sum.stats.sum.num_objects / pg_map.pg_stat.size();
- if (average_objects_per_pg > 0) {
+ if (average_objects_per_pg > 0 &&
+ pg_map.pg_sum.stats.sum.num_objects >= g_conf->mon_pg_warn_min_objects &&
+ p->second.stats.sum.num_objects >= g_conf->mon_pg_warn_min_pool_objects) {
int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num();
float ratio = (float)objects_per_pg / (float)average_objects_per_pg;
if (g_conf->mon_pg_warn_max_object_skew > 0 &&
diff --git a/src/os/LevelDBStore.h b/src/os/LevelDBStore.h
index 89718ce1987..bc5b612a97a 100644
--- a/src/os/LevelDBStore.h
+++ b/src/os/LevelDBStore.h
@@ -329,13 +329,15 @@ public:
string fpath = path + '/' + n;
struct stat s;
int err = stat(fpath.c_str(), &s);
+ if (err < 0)
+ err = -errno;
// we may race against leveldb while reading files; this should only
// happen when those files are being updated, data is being shuffled
// and files get removed, in which case there's not much of a problem
// as we'll get to them next time around.
if ((err < 0) && (err != -ENOENT)) {
lderr(cct) << __func__ << " error obtaining stats for " << fpath
- << ": " << cpp_strerror(errno) << dendl;
+ << ": " << cpp_strerror(err) << dendl;
goto err;
}
diff --git a/src/rgw/rgw_acl.cc b/src/rgw/rgw_acl.cc
index 3f99d72cd5b..02504524847 100644
--- a/src/rgw/rgw_acl.cc
+++ b/src/rgw/rgw_acl.cc
@@ -79,7 +79,7 @@ int RGWAccessControlPolicy::get_perm(string& id, int perm_mask) {
if ((perm & perm_mask) != perm_mask) {
perm |= acl.get_group_perm(ACL_GROUP_ALL_USERS, perm_mask);
- if (compare_group_name(id, ACL_GROUP_ALL_USERS) != 0) {
+ if (!compare_group_name(id, ACL_GROUP_ALL_USERS)) {
/* this is not the anonymous user */
perm |= acl.get_group_perm(ACL_GROUP_AUTHENTICATED_USERS, perm_mask);
}
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index acaa5deffee..5fbecf88cab 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -468,7 +468,7 @@ int main(int argc, const char **argv)
/* alternative default for module */
vector<const char *> def_args;
- def_args.push_back("--debug-rgw=20");
+ def_args.push_back("--debug-rgw=1/5");
def_args.push_back("--keyring=$rgw_data/keyring");
def_args.push_back("--log-file=/var/log/radosgw/$cluster-$name");
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 9f0a900f3d3..20ca8d8eb8f 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -82,18 +82,26 @@ void RGWDefaultRegionInfo::decode_json(JSONObj *obj) {
JSONDecoder::decode_json("default_region", default_region, obj);
}
-string RGWRegion::get_pool_name(CephContext *cct)
+int RGWRegion::get_pool_name(CephContext *cct, string *pool_name)
{
- string pool_name = cct->_conf->rgw_region_root_pool;
- if (pool_name.empty()) {
- pool_name = RGW_DEFAULT_REGION_ROOT_POOL;
+ *pool_name = cct->_conf->rgw_region_root_pool;
+ if (pool_name->empty()) {
+ *pool_name = RGW_DEFAULT_REGION_ROOT_POOL;
+ } else if ((*pool_name)[0] != '.') {
+ derr << "ERROR: region root pool name must start with a period" << dendl;
+ return -EINVAL;
}
- return pool_name;
+ return 0;
}
int RGWRegion::read_default(RGWDefaultRegionInfo& default_info)
{
- string pool_name = get_pool_name(cct);
+ string pool_name;
+
+ int ret = get_pool_name(cct, &pool_name);
+ if (ret < 0) {
+ return ret;
+ }
string oid = cct->_conf->rgw_default_region_info_oid;
if (oid.empty()) {
@@ -102,7 +110,7 @@ int RGWRegion::read_default(RGWDefaultRegionInfo& default_info)
rgw_bucket pool(pool_name.c_str());
bufferlist bl;
- int ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL);
+ ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL);
if (ret < 0)
return ret;
@@ -121,7 +129,10 @@ int RGWRegion::read_default(RGWDefaultRegionInfo& default_info)
int RGWRegion::set_as_default()
{
- string pool_name = get_pool_name(cct);
+ string pool_name;
+ int ret = get_pool_name(cct, &pool_name);
+ if (ret < 0)
+ return ret;
string oid = cct->_conf->rgw_default_region_info_oid;
if (oid.empty()) {
@@ -136,7 +147,7 @@ int RGWRegion::set_as_default()
::encode(default_info, bl);
- int ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), false, NULL, 0, NULL);
+ ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), false, NULL, 0, NULL);
if (ret < 0)
return ret;
@@ -185,7 +196,11 @@ int RGWRegion::init(CephContext *_cct, RGWRados *_store, bool setup_region)
int RGWRegion::read_info(const string& region_name)
{
- string pool_name = get_pool_name(cct);
+ string pool_name;
+ int ret = get_pool_name(cct, &pool_name);
+ if (ret < 0)
+ return ret;
+
rgw_bucket pool(pool_name.c_str());
bufferlist bl;
@@ -193,7 +208,7 @@ int RGWRegion::read_info(const string& region_name)
string oid = region_info_oid_prefix + name;
- int ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL);
+ ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL);
if (ret < 0) {
lderr(cct) << "failed reading region info from " << pool << ":" << oid << ": " << cpp_strerror(-ret) << dendl;
return ret;
@@ -246,7 +261,10 @@ int RGWRegion::create_default()
int RGWRegion::store_info(bool exclusive)
{
- string pool_name = get_pool_name(cct);
+ string pool_name;
+ int ret = get_pool_name(cct, &pool_name);
+ if (ret < 0)
+ return ret;
rgw_bucket pool(pool_name.c_str());
@@ -254,7 +272,7 @@ int RGWRegion::store_info(bool exclusive)
bufferlist bl;
::encode(*this, bl);
- int ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), exclusive, NULL, 0, NULL);
+ ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), exclusive, NULL, 0, NULL);
return ret;
}
@@ -293,13 +311,17 @@ void RGWZoneParams::init_default(RGWRados *store)
}
}
-string RGWZoneParams::get_pool_name(CephContext *cct)
+int RGWZoneParams::get_pool_name(CephContext *cct, string *pool_name)
{
- string pool_name = cct->_conf->rgw_zone_root_pool;
- if (pool_name.empty()) {
- pool_name = RGW_DEFAULT_ZONE_ROOT_POOL;
+ *pool_name = cct->_conf->rgw_zone_root_pool;
+ if (pool_name->empty()) {
+ *pool_name = RGW_DEFAULT_ZONE_ROOT_POOL;
+ } else if ((*pool_name)[0] != '.') {
+ derr << "ERROR: zone root pool name must start with a period" << dendl;
+ return -EINVAL;
}
- return pool_name;
+
+ return 0;
}
void RGWZoneParams::init_name(CephContext *cct, RGWRegion& region)
@@ -319,13 +341,16 @@ int RGWZoneParams::init(CephContext *cct, RGWRados *store, RGWRegion& region)
{
init_name(cct, region);
- string pool_name = get_pool_name(cct);
+ string pool_name;
+ int ret = get_pool_name(cct, &pool_name);
+ if (ret < 0)
+ return ret;
rgw_bucket pool(pool_name.c_str());
bufferlist bl;
string oid = zone_info_oid_prefix + name;
- int ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL);
+ ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL);
if (ret < 0)
return ret;
@@ -344,14 +369,17 @@ int RGWZoneParams::store_info(CephContext *cct, RGWRados *store, RGWRegion& regi
{
init_name(cct, region);
- string pool_name = get_pool_name(cct);
+ string pool_name;
+ int ret = get_pool_name(cct, &pool_name);
+ if (ret < 0)
+ return ret;
rgw_bucket pool(pool_name.c_str());
string oid = zone_info_oid_prefix + name;
bufferlist bl;
::encode(*this, bl);
- int ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), false, NULL, 0, NULL);
+ ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), false, NULL, 0, NULL);
return ret;
}
@@ -1032,14 +1060,20 @@ int RGWRados::list_raw_prefixed_objs(string pool_name, const string& prefix, lis
int RGWRados::list_regions(list<string>& regions)
{
- string pool_name = RGWRegion::get_pool_name(cct);
+ string pool_name;
+ int ret = RGWRegion::get_pool_name(cct, &pool_name);
+ if (ret < 0)
+ return ret;
return list_raw_prefixed_objs(pool_name, region_info_oid_prefix, regions);
}
int RGWRados::list_zones(list<string>& zones)
{
- string pool_name = RGWZoneParams::get_pool_name(cct);
+ string pool_name;
+ int ret = RGWZoneParams::get_pool_name(cct, &pool_name);
+ if (ret < 0)
+ return ret;
return list_raw_prefixed_objs(pool_name, zone_info_oid_prefix, zones);
}
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index 52b898123d4..b37652d9f3f 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -433,7 +433,7 @@ struct RGWZoneParams {
map<string, RGWZonePlacementInfo> placement_pools;
- static string get_pool_name(CephContext *cct);
+ static int get_pool_name(CephContext *cct, string *pool_name);
void init_name(CephContext *cct, RGWRegion& region);
int init(CephContext *cct, RGWRados *store, RGWRegion& region);
void init_default(RGWRados *store);
@@ -622,7 +622,7 @@ struct RGWRegion {
int set_as_default();
int equals(const string& other_region);
- static string get_pool_name(CephContext *cct);
+ static int get_pool_name(CephContext *cct, string *pool_name);
void dump(Formatter *f) const;
void decode_json(JSONObj *obj);
diff --git a/src/test/Makefile.am b/src/test/Makefile.am
index 59b4d89e930..84a228f1d4b 100644
--- a/src/test/Makefile.am
+++ b/src/test/Makefile.am
@@ -856,11 +856,6 @@ ceph_test_keyvaluedb_iterators_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
ceph_test_keyvaluedb_iterators_CXXFLAGS = $(UNITTEST_CXXFLAGS)
bin_DEBUGPROGRAMS += ceph_test_keyvaluedb_iterators
-ceph_test_store_tool_SOURCES = test/ObjectMap/test_store_tool/test_store_tool.cc
-ceph_test_store_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL)
-ceph_test_store_tool_CXXFLAGS = $(UNITTEST_CXXFLAGS)
-bin_DEBUGPROGRAMS += ceph_test_store_tool
-
ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
bin_DEBUGPROGRAMS += ceph_test_cfuse_cache_invalidate
diff --git a/src/test/filestore/run_seed_to.sh b/src/test/filestore/run_seed_to.sh
index fdf56141e12..d5bb671138c 100755
--- a/src/test/filestore/run_seed_to.sh
+++ b/src/test/filestore/run_seed_to.sh
@@ -246,13 +246,13 @@ do
do_rm $tmp_name_a $tmp_name_a.fail $tmp_name_a.recover
$v ceph_test_filestore_idempotent_sequence run-sequence-to $to \
$tmp_name_a $tmp_name_a/journal \
- --filestore-xattr-use-omap --test-seed $seed --osd-journal-size 100 \
+ --test-seed $seed --osd-journal-size 100 \
--filestore-kill-at $killat $tmp_opts_a \
--log-file $tmp_name_a.fail --debug-filestore 20 || true
stop_at=`ceph_test_filestore_idempotent_sequence get-last-op \
$tmp_name_a $tmp_name_a/journal \
- --filestore-xattr-use-omap --log-file $tmp_name_a.recover \
+ --log-file $tmp_name_a.recover \
--debug-filestore 20 --debug-journal 20`
if [[ "`expr $stop_at - $stop_at 2>/dev/null`" != "0" ]]; then
@@ -265,12 +265,11 @@ do
do_rm $tmp_name_b $tmp_name_b.clean
$v ceph_test_filestore_idempotent_sequence run-sequence-to \
$stop_at $tmp_name_b $tmp_name_b/journal \
- --filestore-xattr-use-omap --test-seed $seed --osd-journal-size 100 \
+ --test-seed $seed --osd-journal-size 100 \
--log-file $tmp_name_b.clean --debug-filestore 20 $tmp_opts_b
if $v ceph_test_filestore_idempotent_sequence diff \
- $tmp_name_a $tmp_name_a/journal $tmp_name_b $tmp_name_b/journal \
- --filestore-xattr-use-omap; then
+ $tmp_name_a $tmp_name_a/journal $tmp_name_b $tmp_name_b/journal ; then
echo OK
else
echo "FAIL"
diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc
index 7158f50a74a..842f9d2bca3 100644
--- a/src/test/osd/TestRados.cc
+++ b/src/test/osd/TestRados.cc
@@ -111,22 +111,23 @@ private:
return new SnapCreateOp(m_op, &context, m_stats);
case TEST_OP_SNAP_REMOVE:
- if (context.snaps.empty()) {
+ if (context.snaps.size() <= context.snaps_in_use.size()) {
return NULL;
- } else {
+ }
+ while (true) {
int snap = rand_choose(context.snaps)->first;
+ if (context.snaps_in_use.count(snap))
+ continue; // in use; try again!
cout << "snap_remove snap " << snap << std::endl;
return new SnapRemoveOp(m_op, &context, snap, m_stats);
}
case TEST_OP_ROLLBACK:
- if (context.snaps.size() <= context.snaps_in_use.size()) {
+ if (context.snaps.empty()) {
return NULL;
}
- while (true) {
+ {
int snap = rand_choose(context.snaps)->first;
- if (context.snaps_in_use.count(snap))
- continue; // in use; try again!
string oid = *(rand_choose(context.oid_not_in_use));
cout << "rollback oid " << oid << " to " << snap << std::endl;
return new RollbackOp(m_op, &context, oid, snap);
diff --git a/src/tools/Makefile.am b/src/tools/Makefile.am
index 4b8da77951a..89417014dd4 100644
--- a/src/tools/Makefile.am
+++ b/src/tools/Makefile.am
@@ -6,6 +6,12 @@ ceph_monstore_tool_SOURCES = tools/ceph-monstore-tool.cc
ceph_monstore_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL) -lboost_program_options
bin_DEBUGPROGRAMS += ceph-monstore-tool
+ceph_kvstore_tool_SOURCES = tools/ceph-kvstore-tool.cc
+ceph_kvstore_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL)
+ceph_kvstore_tool_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+bin_DEBUGPROGRAMS += ceph-kvstore-tool
+
+
ceph_filestore_dump_SOURCES = tools/ceph-filestore-dump.cc
ceph_filestore_dump_LDADD = $(LIBOSD) $(LIBOS) $(CEPH_GLOBAL) -lboost_program_options
if LINUX
diff --git a/src/test/ObjectMap/test_store_tool/test_store_tool.cc b/src/tools/ceph-kvstore-tool.cc
index 8fcf3f30e82..e07391d5c51 100644
--- a/src/test/ObjectMap/test_store_tool/test_store_tool.cc
+++ b/src/tools/ceph-kvstore-tool.cc
@@ -25,21 +25,25 @@
#include "common/safe_io.h"
#include "common/config.h"
#include "common/strtol.h"
+#include "include/stringify.h"
using namespace std;
class StoreTool
{
boost::scoped_ptr<KeyValueDB> db;
+ string store_path;
public:
- StoreTool(const string &path) {
- LevelDBStore *db_ptr = new LevelDBStore(g_ceph_context, path);
+ StoreTool(const string &path) : store_path(path) {
+ LevelDBStore *db_ptr = new LevelDBStore(g_ceph_context, store_path);
assert(!db_ptr->open(std::cerr));
db.reset(db_ptr);
}
- void list(const string &prefix, const bool do_crc) {
+ uint32_t traverse(const string &prefix,
+ const bool do_crc,
+ ostream *out) {
KeyValueDB::WholeSpaceIterator iter = db->get_iterator();
if (prefix.empty())
@@ -47,18 +51,36 @@ class StoreTool
else
iter->seek_to_first(prefix);
+ uint32_t crc = -1;
+
while (iter->valid()) {
pair<string,string> rk = iter->raw_key();
if (!prefix.empty() && (rk.first != prefix))
- break;
+ break;
- std::cout << rk.first << ":" << rk.second;
+ if (out)
+ *out << rk.first << ":" << rk.second;
if (do_crc) {
- std::cout << " (" << iter->value().crc32c(0) << ")";
+ bufferlist bl;
+ bl.append(rk.first);
+ bl.append(rk.second);
+ bl.append(iter->value());
+
+ crc = bl.crc32c(crc);
+ if (out) {
+ *out << " (" << bl.crc32c(0) << ")";
+ }
}
- std::cout << std::endl;
+ if (out)
+ *out << std::endl;
iter->next();
}
+
+ return crc;
+ }
+
+ void list(const string &prefix, const bool do_crc) {
+ traverse(prefix, do_crc, &std::cout);
}
bool exists(const string &prefix) {
@@ -118,6 +140,70 @@ class StoreTool
return (ret == 0);
}
+
+ int copy_store_to(const string &other_path, const int num_keys_per_tx) {
+
+ if (num_keys_per_tx <= 0) {
+ std::cerr << "must specify a number of keys/tx > 0" << std::endl;
+ return -EINVAL;
+ }
+
+ // open or create a leveldb store at @p other_path
+ LevelDBStore other(g_ceph_context, other_path);
+ int err = other.create_and_open(std::cerr);
+ if (err < 0)
+ return err;
+
+ KeyValueDB::WholeSpaceIterator it = db->get_iterator();
+ it->seek_to_first();
+ uint64_t total_keys = 0;
+ uint64_t total_size = 0;
+ uint64_t total_txs = 0;
+
+ utime_t started_at = ceph_clock_now(g_ceph_context);
+
+ do {
+ int num_keys = 0;
+
+ KeyValueDB::Transaction tx = other.get_transaction();
+
+
+ while (it->valid() && num_keys < num_keys_per_tx) {
+ pair<string,string> k = it->raw_key();
+ bufferlist v = it->value();
+ tx->set(k.first, k.second, v);
+
+ num_keys ++;
+ total_size += v.length();
+
+ it->next();
+ }
+
+ total_txs ++;
+ total_keys += num_keys;
+
+ if (num_keys > 0)
+ other.submit_transaction_sync(tx);
+
+ utime_t cur_duration = ceph_clock_now(g_ceph_context) - started_at;
+ std::cout << "ts = " << cur_duration << "s, copied " << total_keys
+ << " keys so far (" << stringify(si_t(total_size)) << ")"
+ << std::endl;
+
+ } while (it->valid());
+
+ utime_t time_taken = ceph_clock_now(g_ceph_context) - started_at;
+
+ std::cout << "summary:" << std::endl;
+ std::cout << " copied " << total_keys << " keys" << std::endl;
+ std::cout << " used " << total_txs << " transactions" << std::endl;
+ std::cout << " total size " << stringify(si_t(total_size)) << std::endl;
+ std::cout << " from '" << store_path << "' to '" << other_path << "'"
+ << std::endl;
+ std::cout << " duration " << time_taken << " seconds" << std::endl;
+
+ return 0;
+ }
};
void usage(const char *pname)
@@ -132,6 +218,8 @@ void usage(const char *pname)
<< " crc <prefix> <key>\n"
<< " get-size\n"
<< " set <prefix> <key> [ver <N>|in <file>]\n"
+ << " store-copy <path> [num-keys-per-tx]\n"
+ << " store-crc <path>\n"
<< std::endl;
}
@@ -155,8 +243,6 @@ int main(int argc, const char *argv[])
string path(args[0]);
string cmd(args[1]);
- std::cout << "path: " << path << " cmd " << cmd << std::endl;
-
StoreTool st(path);
if (cmd == "list" || cmd == "list-crc") {
@@ -260,6 +346,30 @@ int main(int argc, const char *argv[])
<< prefix << "," << key << ")" << std::endl;
return 1;
}
+ } else if (cmd == "store-copy") {
+ int num_keys_per_tx = 128; // magic number that just feels right.
+ if (argc < 4) {
+ usage(argv[0]);
+ return 1;
+ } else if (argc > 4) {
+ string err;
+ num_keys_per_tx = strict_strtol(argv[4], 10, &err);
+ if (!err.empty()) {
+ std::cerr << "invalid num_keys_per_tx: " << err << std::endl;
+ return 1;
+ }
+ }
+
+ int ret = st.copy_store_to(argv[3], num_keys_per_tx);
+ if (ret < 0) {
+ std::cerr << "error copying store to path '" << argv[3]
+ << "': " << cpp_strerror(ret) << std::endl;
+ return 1;
+ }
+
+ } else if (cmd == "store-crc") {
+ uint32_t crc = st.traverse(string(), true, NULL);
+ std::cout << "store at '" << path << "' crc " << crc << std::endl;
} else {
std::cerr << "Unrecognized command: " << cmd << std::endl;