diff options
author | Sage Weil <sage@inktank.com> | 2013-10-16 14:13:03 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-10-16 14:13:03 -0700 |
commit | 5c476277a61902a002401d1655789a87037bc5ac (patch) | |
tree | bccc17e80dbb0686f84a715ea0086613ac37e694 | |
parent | a6a3f50dceb92dd7c5eb0f6e956e491ed16e94e6 (diff) | |
parent | 14e91bf1246eed944adf8bbad627c030654da142 (diff) | |
download | ceph-5c476277a61902a002401d1655789a87037bc5ac.tar.gz |
Merge remote-tracking branch 'gh/next'
-rw-r--r-- | ceph.spec.in | 2 | ||||
-rw-r--r-- | debian/ceph-test.install | 2 | ||||
-rw-r--r-- | src/common/config_opts.h | 2 | ||||
-rw-r--r-- | src/mon/MonCommands.h | 6 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 99 | ||||
-rw-r--r-- | src/mon/PGMap.cc | 299 | ||||
-rw-r--r-- | src/mon/PGMap.h | 73 | ||||
-rw-r--r-- | src/mon/PGMonitor.cc | 39 | ||||
-rw-r--r-- | src/os/LevelDBStore.h | 4 | ||||
-rw-r--r-- | src/rgw/rgw_acl.cc | 2 | ||||
-rw-r--r-- | src/rgw/rgw_main.cc | 2 | ||||
-rw-r--r-- | src/rgw/rgw_rados.cc | 82 | ||||
-rw-r--r-- | src/rgw/rgw_rados.h | 4 | ||||
-rw-r--r-- | src/test/Makefile.am | 5 | ||||
-rwxr-xr-x | src/test/filestore/run_seed_to.sh | 9 | ||||
-rw-r--r-- | src/test/osd/TestRados.cc | 13 | ||||
-rw-r--r-- | src/tools/Makefile.am | 6 | ||||
-rw-r--r-- | src/tools/ceph-kvstore-tool.cc (renamed from src/test/ObjectMap/test_store_tool/test_store_tool.cc) | 128 |
18 files changed, 642 insertions, 135 deletions
diff --git a/ceph.spec.in b/ceph.spec.in index bcb1214cc93..ee4b84ef090 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -665,12 +665,12 @@ fi %{_bindir}/ceph_test_rados_watch_notify %{_bindir}/ceph_test_signal_handlers %{_bindir}/ceph_test_snap_mapper -%{_bindir}/ceph_test_store_tool %{_bindir}/ceph_test_timers %{_bindir}/ceph_tpbench %{_bindir}/ceph_xattr_bench %{_bindir}/ceph-monstore-tool %{_bindir}/ceph-osdomap-tool +%{_bindir}/ceph-kvstore-tool %files -n libcephfs_jni1 %defattr(-,root,root,-) diff --git a/debian/ceph-test.install b/debian/ceph-test.install index c5a5e0a9774..237a05850be 100644 --- a/debian/ceph-test.install +++ b/debian/ceph-test.install @@ -67,7 +67,6 @@ usr/bin/ceph_test_rados_watch_notify usr/bin/ceph_test_rewrite_latency usr/bin/ceph_test_signal_handlers usr/bin/ceph_test_snap_mapper -usr/bin/ceph_test_store_tool usr/bin/ceph_test_stress_watch usr/bin/ceph_test_timers usr/bin/ceph_test_trans @@ -75,4 +74,5 @@ usr/bin/ceph_tpbench usr/bin/ceph_xattr_bench usr/bin/ceph-monstore-tool usr/bin/ceph-osdomap-tool +usr/bin/ceph-kvstore-tool usr/share/java/libcephfs-test.jar diff --git a/src/common/config_opts.h b/src/common/config_opts.h index b419dec88b5..0b3938ecb9e 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -160,6 +160,8 @@ OPTION(mon_pg_create_interval, OPT_FLOAT, 30.0) // no more than every 30s OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info) OPTION(mon_pg_warn_min_per_osd, OPT_INT, 20) // min # pgs per (in) osd before we warn the admin OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg +OPTION(mon_pg_warn_min_objects, OPT_INT, 10000) // do not warn below this object # +OPTION(mon_pg_warn_min_pool_objects, OPT_INT, 1000) // do not warn on pools below this object # OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full" OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full OPTION(mon_globalid_prealloc, OPT_INT, 100) // how many globalids to prealloc diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index ae6bffe0d7d..5a6ca6a471d 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -112,7 +112,7 @@ COMMAND("pg send_pg_creates", "trigger pg creates to be issued",\ "pg", "rw", "cli,rest") COMMAND("pg dump " \ "name=dumpcontents,type=CephChoices,strings=all|summary|sum|delta|pools|osds|pgs|pgs_brief,n=N,req=false", \ - "show human-readable versions of pg map", "pg", "r", "cli,rest") + "show human-readable versions of pg map (only 'all' valid with plain)", "pg", "r", "cli,rest") COMMAND("pg dump_json " \ "name=dumpcontents,type=CephChoices,strings=all|summary|sum|pools|osds|pgs,n=N,req=false", \ "show human-readable version of pg map in json only",\ @@ -518,6 +518,10 @@ COMMAND("osd pool set-quota " \ "name=field,type=CephChoices,strings=max_objects|max_bytes " \ "name=val,type=CephString", "set object or byte limit on pool", "osd", "rw", "cli,rest") +COMMAND("osd pool stats " \ + "name=name,type=CephString,req=false", + "obtain stats from all pools, or from specified pool", + "osd", "r", "cli,rest") COMMAND("osd reweight-by-utilization " \ "name=oload,type=CephInt,range=100,req=false", \ "reweight OSDs by utilization [overload-percentage-for-consideration, default 120]", \ diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 425375b29e2..83e85847045 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2296,6 +2296,105 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) } r = 0; + } else if (prefix == "osd pool stats") { + string pool_name; + cmd_getval(g_ceph_context, cmdmap, "name", pool_name); + + PGMap& pg_map = mon->pgmon()->pg_map; + + int64_t poolid = -ENOENT; + bool one_pool = false; + if (!pool_name.empty()) { + poolid = osdmap.lookup_pg_pool_name(pool_name); + if (poolid < 0) { + assert(poolid == -ENOENT); + ss << "unrecognized pool '" << pool_name << "'"; + r = -ENOENT; + goto reply; + } + one_pool = true; + } + + stringstream rs; + + if (f) + f->open_array_section("pool_stats"); + if (osdmap.get_pools().size() == 0) { + if (!f) + ss << "there are no pools!"; + goto stats_out; + } + + for (map<int64_t,pg_pool_t>::const_iterator it = osdmap.get_pools().begin(); + it != osdmap.get_pools().end(); + ++it) { + + if (!one_pool) + poolid = it->first; + + pool_name = osdmap.get_pool_name(poolid); + + if (f) { + f->open_object_section("pool"); + f->dump_string("pool_name", pool_name.c_str()); + f->dump_int("pool_id", poolid); + f->open_object_section("recovery"); + } + + stringstream rss, tss; + pg_map.pool_recovery_summary(f.get(), &rss, poolid); + if (!f && !rss.str().empty()) + tss << " " << rss.str() << "\n"; + + if (f) { + f->close_section(); + f->open_object_section("recovery_rate"); + } + + rss.clear(); + rss.str(""); + + pg_map.pool_recovery_rate_summary(f.get(), &rss, poolid); + if (!f && !rss.str().empty()) + tss << " recovery io " << rss.str() << "\n"; + + if (f) { + f->close_section(); + f->open_object_section("client_io_rate"); + } + + rss.clear(); + rss.str(""); + + pg_map.pool_client_io_rate_summary(f.get(), &rss, poolid); + if (!f && !rss.str().empty()) + tss << " client io " << rss.str() << "\n"; + + if (f) { + f->close_section(); + f->close_section(); + } else { + rs << "pool " << pool_name << " id " << poolid << "\n"; + if (!tss.str().empty()) + rs << tss.str() << "\n"; + else + rs << " nothing is going on\n\n"; + } + + if (one_pool) + break; + } + +stats_out: + if (f) { + f->close_section(); + f->flush(rdata); + } else { + rdata.append(rs.str()); + } + rdata.append("\n"); + r = 0; + } else if (prefix == "osd crush rule list" || prefix == "osd crush rule ls") { string format; diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index ea70bbd61c3..39cb30f97c8 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -180,6 +180,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) stamp = inc.stamp; pool_stat_t pg_sum_old = pg_sum; + hash_map<uint64_t, pool_stat_t> pg_pool_sum_old; bool ratios_changed = false; if (inc.full_ratio != full_ratio && inc.full_ratio != -1) { @@ -199,6 +200,9 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) const pg_t &update_pg(p->first); const pg_stat_t &update_stat(p->second); + if (pg_pool_sum_old.count(update_pg.pool()) == 0) + pg_pool_sum_old[update_pg.pool()] = pg_pool_sum[update_pg.pool()]; + hash_map<pg_t,pg_stat_t>::iterator t = pg_stat.find(update_pg); if (t == pg_stat.end()) { hash_map<pg_t,pg_stat_t>::value_type v(update_pg, update_stat); @@ -216,7 +220,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) ++p) { int osd = p->first; const osd_stat_t &new_stats(p->second); - + hash_map<int32_t,osd_stat_t>::iterator t = osd_stat.find(osd); if (t == osd_stat.end()) { hash_map<int32_t,osd_stat_t>::value_type v(osd, new_stats); @@ -229,7 +233,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) osd_epochs.insert(*(inc.get_osd_epochs().find(osd))); stat_osd_add(new_stats); - + // adjust [near]full status register_nearfull_status(osd, new_stats); } @@ -243,7 +247,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) pg_stat.erase(s); } } - + for (set<int>::iterator p = inc.get_osd_stat_rm().begin(); p != inc.get_osd_stat_rm().end(); ++p) { @@ -270,7 +274,9 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) stamp_delta -= pg_sum_deltas.front().second; pg_sum_deltas.pop_front(); } - + + update_pool_deltas(cct, inc.stamp, pg_pool_sum_old); + if (inc.osdmap_epoch) last_osdmap_epoch = inc.osdmap_epoch; if (inc.pg_scan) @@ -780,54 +786,59 @@ void PGMap::print_osd_perf_stats(std::ostream *ss) const (*ss) << tab; } -void PGMap::recovery_summary(Formatter *f, ostream *out) const +void PGMap::recovery_summary(Formatter *f, ostream *out, + pool_stat_t delta_sum) const { bool first = true; - if (pg_sum.stats.sum.num_objects_degraded) { - double pc = (double)pg_sum.stats.sum.num_objects_degraded / (double)pg_sum.stats.sum.num_object_copies * (double)100.0; + if (delta_sum.stats.sum.num_objects_degraded) { + double pc = (double)delta_sum.stats.sum.num_objects_degraded / + (double)delta_sum.stats.sum.num_object_copies * (double)100.0; char b[20]; snprintf(b, sizeof(b), "%.3lf", pc); if (f) { - f->dump_unsigned("degraded_objects", pg_sum.stats.sum.num_objects_degraded); - f->dump_unsigned("degraded_total", pg_sum.stats.sum.num_object_copies); + f->dump_unsigned("degraded_objects", delta_sum.stats.sum.num_objects_degraded); + f->dump_unsigned("degraded_total", delta_sum.stats.sum.num_object_copies); f->dump_string("degrated_ratio", b); } else { - *out << pg_sum.stats.sum.num_objects_degraded - << "/" << pg_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)"; + *out << delta_sum.stats.sum.num_objects_degraded + << "/" << delta_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)"; } first = false; } - if (pg_sum.stats.sum.num_objects_unfound) { - double pc = (double)pg_sum.stats.sum.num_objects_unfound / (double)pg_sum.stats.sum.num_objects * (double)100.0; + if (delta_sum.stats.sum.num_objects_unfound) { + double pc = (double)delta_sum.stats.sum.num_objects_unfound / + (double)delta_sum.stats.sum.num_objects * (double)100.0; char b[20]; snprintf(b, sizeof(b), "%.3lf", pc); if (f) { - f->dump_unsigned("unfound_objects", pg_sum.stats.sum.num_objects_unfound); - f->dump_unsigned("unfound_total", pg_sum.stats.sum.num_objects); + f->dump_unsigned("unfound_objects", delta_sum.stats.sum.num_objects_unfound); + f->dump_unsigned("unfound_total", delta_sum.stats.sum.num_objects); f->dump_string("unfound_ratio", b); } else { if (!first) *out << "; "; - *out << pg_sum.stats.sum.num_objects_unfound - << "/" << pg_sum.stats.sum.num_objects << " unfound (" << b << "%)"; + *out << delta_sum.stats.sum.num_objects_unfound + << "/" << delta_sum.stats.sum.num_objects << " unfound (" << b << "%)"; } first = false; } } -void PGMap::recovery_rate_summary(Formatter *f, ostream *out) const +void PGMap::recovery_rate_summary(Formatter *f, ostream *out, + pool_stat_t delta_sum, + utime_t delta_stamp) const { // make non-negative; we can get negative values if osds send // uncommitted stats and then "go backward" or if they are just // buggy/wrong. - pool_stat_t pos_delta = pg_sum_delta; + pool_stat_t pos_delta = delta_sum; pos_delta.floor(0); if (pos_delta.stats.sum.num_objects_recovered || pos_delta.stats.sum.num_bytes_recovered || pos_delta.stats.sum.num_keys_recovered) { - int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)stamp_delta; - int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)stamp_delta; - int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)stamp_delta; + int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)delta_stamp; + int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)delta_stamp; + int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)delta_stamp; if (f) { f->dump_int("recovering_objects_per_sec", objps); f->dump_int("recovering_bytes_per_sec", bps); @@ -841,24 +852,194 @@ void PGMap::recovery_rate_summary(Formatter *f, ostream *out) const } } -void PGMap::update_delta(CephContext *cct, utime_t inc_stamp, pool_stat_t& pg_sum_old) +void PGMap::overall_recovery_rate_summary(Formatter *f, ostream *out) const +{ + recovery_rate_summary(f, out, pg_sum_delta, stamp_delta); +} + +void PGMap::overall_recovery_summary(Formatter *f, ostream *out) const +{ + recovery_summary(f, out, pg_sum); +} + +void PGMap::pool_recovery_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const { + hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p = + per_pool_sum_delta.find(poolid); + if (p == per_pool_sum_delta.end()) + return; + hash_map<uint64_t,utime_t>::const_iterator ts = + per_pool_sum_deltas_stamps.find(p->first); + assert(ts != per_pool_sum_deltas_stamps.end()); + recovery_rate_summary(f, out, p->second.first, ts->second); +} + +void PGMap::pool_recovery_summary(Formatter *f, ostream *out, + uint64_t poolid) const +{ + hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p = + per_pool_sum_delta.find(poolid); + if (p == per_pool_sum_delta.end()) + return; + recovery_summary(f, out, p->second.first); +} + +void PGMap::client_io_rate_summary(Formatter *f, ostream *out, + pool_stat_t delta_sum, + utime_t delta_stamp) const +{ + pool_stat_t pos_delta = delta_sum; + pos_delta.floor(0); + if (pos_delta.stats.sum.num_rd || + pos_delta.stats.sum.num_wr) { + if (pos_delta.stats.sum.num_rd) { + int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)delta_stamp; + if (f) { + f->dump_int("read_bytes_sec", rd); + } else { + *out << pretty_si_t(rd) << "B/s rd, "; + } + } + if (pos_delta.stats.sum.num_wr) { + int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)delta_stamp; + if (f) { + f->dump_int("write_bytes_sec", wr); + } else { + *out << pretty_si_t(wr) << "B/s wr, "; + } + } + int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)delta_stamp; + if (f) { + f->dump_int("op_per_sec", iops); + } else { + *out << pretty_si_t(iops) << "op/s"; + } + } +} + +void PGMap::overall_client_io_rate_summary(Formatter *f, ostream *out) const +{ + client_io_rate_summary(f, out, pg_sum_delta, stamp_delta); +} + +void PGMap::pool_client_io_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const +{ + hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p = + per_pool_sum_delta.find(poolid); + if (p == per_pool_sum_delta.end()) + return; + hash_map<uint64_t,utime_t>::const_iterator ts = + per_pool_sum_deltas_stamps.find(p->first); + assert(ts != per_pool_sum_deltas_stamps.end()); + client_io_rate_summary(f, out, p->second.first, ts->second); +} + +/** + * update aggregated delta + * + * @param cct ceph context + * @param ts Timestamp for the stats being delta'ed + * @param old_pool_sum Previous stats sum + * @param last_ts Last timestamp for pool + * @param result_pool_sum Resulting stats + * @param result_ts_delta Resulting timestamp delta + * @param delta_avg_list List of last N computed deltas, used to average + */ +void PGMap::update_delta(CephContext *cct, + const utime_t ts, + const pool_stat_t& old_pool_sum, + utime_t *last_ts, + const pool_stat_t& current_pool_sum, + pool_stat_t *result_pool_delta, + utime_t *result_ts_delta, + list<pair<pool_stat_t,utime_t> > *delta_avg_list) +{ + /* @p ts is the timestamp we want to associate with the data + * in @p old_pool_sum, and on which we will base ourselves to + * calculate the delta, stored in 'delta_t'. + */ utime_t delta_t; - delta_t = inc_stamp; - delta_t -= stamp; - stamp = inc_stamp; + delta_t = ts; // start with the provided timestamp + delta_t -= *last_ts; // take the last timestamp we saw + *last_ts = ts; // @p ts becomes the last timestamp we saw // calculate a delta, and average over the last 2 deltas. - pool_stat_t d = pg_sum; - d.stats.sub(pg_sum_old.stats); - pg_sum_deltas.push_back(make_pair(d, delta_t)); - stamp_delta += delta_t; + /* start by taking a copy of our current @p result_pool_sum, and by + * taking out the stats from @p old_pool_sum. This generates a stats + * delta. Stash this stats delta in @p delta_avg_list, along with the + * timestamp delta for these results. + */ + pool_stat_t d = current_pool_sum; + d.stats.sub(old_pool_sum.stats); + delta_avg_list->push_back(make_pair(d,delta_t)); + *result_ts_delta += delta_t; + + /* Aggregate current delta, and take out the last seen delta (if any) to + * average it out. + */ + result_pool_delta->stats.add(d.stats); + size_t s = MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1); + if (delta_avg_list->size() > s) { + result_pool_delta->stats.sub(delta_avg_list->front().first.stats); + *result_ts_delta -= delta_avg_list->front().second; + delta_avg_list->pop_front(); + } +} - pg_sum_delta.stats.add(d.stats); - if (pg_sum_deltas.size() > (std::list< pair<pool_stat_t, utime_t> >::size_type)MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1)) { - pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats); - stamp_delta -= pg_sum_deltas.front().second; - pg_sum_deltas.pop_front(); +/** + * update aggregated delta + * + * @param cct ceph context + * @param ts Timestamp + * @param pg_sum_old Old pg_sum + */ +void PGMap::update_global_delta(CephContext *cct, + const utime_t ts, const pool_stat_t& pg_sum_old) +{ + update_delta(cct, ts, pg_sum_old, &stamp, pg_sum, &pg_sum_delta, + &stamp_delta, &pg_sum_deltas); +} + +/** + * Update a given pool's deltas + * + * @param cct Ceph Context + * @param ts Timestamp for the stats being delta'ed + * @param pool Pool's id + * @param old_pool_sum Previous stats sum + */ +void PGMap::update_one_pool_delta(CephContext *cct, + const utime_t ts, + const uint64_t pool, + const pool_stat_t& old_pool_sum) +{ + if (per_pool_sum_deltas.count(pool) == 0) { + assert(per_pool_sum_deltas_stamps.count(pool) == 0); + assert(per_pool_sum_delta.count(pool) == 0); + } + + pair<pool_stat_t,utime_t>& sum_delta = per_pool_sum_delta[pool]; + + update_delta(cct, ts, old_pool_sum, &sum_delta.second, pg_pool_sum[pool], + &sum_delta.first, &per_pool_sum_deltas_stamps[pool], + &per_pool_sum_deltas[pool]); +} + +/** + * Update pools' deltas + * + * @param cct CephContext + * @param ts Timestamp for the stats being delta'ed + * @param pg_pool_sum_old Map of pool stats for delta calcs. + */ +void PGMap::update_pool_deltas(CephContext *cct, const utime_t ts, + const hash_map<uint64_t,pool_stat_t>& pg_pool_sum_old) +{ + for (hash_map<uint64_t,pool_stat_t>::const_iterator it = pg_pool_sum_old.begin(); + it != pg_pool_sum_old.end(); ++it) { + update_one_pool_delta(cct, ts, it->first, it->second); } } @@ -911,7 +1092,7 @@ void PGMap::print_summary(Formatter *f, ostream *out) const } std::stringstream ssr; - recovery_summary(f, &ssr); + overall_recovery_summary(f, &ssr); if (!f && ssr.str().length()) *out << " " << ssr.str() << "\n"; ssr.clear(); @@ -920,43 +1101,17 @@ void PGMap::print_summary(Formatter *f, ostream *out) const if (!f) *out << ss.str(); // pgs by state - recovery_rate_summary(f, &ssr); + overall_recovery_rate_summary(f, &ssr); if (!f && ssr.str().length()) *out << "recovery io " << ssr.str() << "\n"; - // make non-negative; we can get negative values if osds send - // uncommitted stats and then "go backward" or if they are just - // buggy/wrong. - pool_stat_t pos_delta = pg_sum_delta; - pos_delta.floor(0); - if (pos_delta.stats.sum.num_rd || - pos_delta.stats.sum.num_wr) { - if (!f) - *out << " client io "; - if (pos_delta.stats.sum.num_rd) { - int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta; - if (f) { - f->dump_int("read_bytes_sec", rd); - } else { - *out << pretty_si_t(rd) << "B/s rd, "; - } - } - if (pos_delta.stats.sum.num_wr) { - int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta; - if (f) { - f->dump_int("write_bytes_sec", wr); - } else { - *out << pretty_si_t(wr) << "B/s wr, "; - } - } - int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta; - if (f) { - f->dump_int("op_per_sec", iops); - } else { - *out << pretty_si_t(iops) << "op/s"; - *out << "\n"; - } - } + ssr.clear(); + ssr.str(""); + + overall_client_io_rate_summary(f, &ssr); + if (!f && ssr.str().length()) + *out << " client io " << ssr.str() << "\n"; + } @@ -1002,12 +1157,12 @@ void PGMap::print_oneline_summary(ostream *out) const } std::stringstream ssr; - recovery_summary(NULL, &ssr); + overall_recovery_summary(NULL, &ssr); if (ssr.str().length()) *out << "; " << ssr.str(); ssr.clear(); ssr.str(""); - recovery_rate_summary(NULL, &ssr); + overall_recovery_rate_summary(NULL, &ssr); if (ssr.str().length()) *out << "; " << ssr.str() << " recovering"; } diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 7a202fc0006..c8ce7fd973e 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -109,13 +109,51 @@ public: utime_t stamp; // recent deltas, and summation + /** + * keep track of last deltas for each pool, calculated using + * @p pg_pool_sum as baseline. + */ + hash_map<uint64_t, list< pair<pool_stat_t, utime_t> > > per_pool_sum_deltas; + /** + * keep track of per-pool timestamp deltas, according to last update on + * each pool. + */ + hash_map<uint64_t, utime_t> per_pool_sum_deltas_stamps; + /** + * keep track of sum deltas, per-pool, taking into account any previous + * deltas existing in @p per_pool_sum_deltas. The utime_t as second member + * of the pair is the timestamp refering to the last update (i.e., the first + * member of the pair) for a given pool. + */ + hash_map<uint64_t, pair<pool_stat_t,utime_t> > per_pool_sum_delta; + list< pair<pool_stat_t, utime_t> > pg_sum_deltas; pool_stat_t pg_sum_delta; utime_t stamp_delta; - void update_delta(CephContext *cct, utime_t inc_stamp, pool_stat_t& pg_sum_old); + void update_global_delta(CephContext *cct, + const utime_t ts, const pool_stat_t& pg_sum_old); + void update_pool_deltas(CephContext *cct, + const utime_t ts, + const hash_map<uint64_t, pool_stat_t>& pg_pool_sum_old); void clear_delta(); + private: + void update_delta(CephContext *cct, + const utime_t ts, + const pool_stat_t& old_pool_sum, + utime_t *last_ts, + const pool_stat_t& current_pool_sum, + pool_stat_t *result_pool_delta, + utime_t *result_ts_delta, + list<pair<pool_stat_t,utime_t> > *delta_avg_list); + + void update_one_pool_delta(CephContext *cct, + const utime_t ts, + const uint64_t pool, + const pool_stat_t& old_pool_sum); + public: + set<pg_t> creating_pgs; // lru: front = new additions, back = recently pinged map<int,set<pg_t> > creating_pgs_by_osd; @@ -205,8 +243,37 @@ public: void dump_osd_perf_stats(Formatter *f) const; void print_osd_perf_stats(std::ostream *ss) const; - void recovery_summary(Formatter *f, ostream *out) const; - void recovery_rate_summary(Formatter *f, ostream *out) const; + void recovery_summary(Formatter *f, ostream *out, + pool_stat_t delta_sum) const; + void overall_recovery_summary(Formatter *f, ostream *out) const; + void pool_recovery_summary(Formatter *f, ostream *out, + uint64_t poolid) const; + void recovery_rate_summary(Formatter *f, ostream *out, + pool_stat_t delta_sum, + utime_t delta_stamp) const; + void overall_recovery_rate_summary(Formatter *f, ostream *out) const; + void pool_recovery_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const; + /** + * Obtain a formatted/plain output for client I/O, source from stats for a + * given @p delta_sum pool over a given @p delta_stamp period of time. + */ + void client_io_rate_summary(Formatter *f, ostream *out, + pool_stat_t delta_sum, + utime_t delta_stamp) const; + /** + * Obtain a formatted/plain output for the overall client I/O, which is + * calculated resorting to @p pg_sum_delta and @p stamp_delta. + */ + void overall_client_io_rate_summary(Formatter *f, ostream *out) const; + /** + * Obtain a formatted/plain output for client I/O over a given pool + * with id @p pool_id. We will then obtain pool-specific data + * from @p per_pool_sum_delta. + */ + void pool_client_io_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const; + void print_summary(Formatter *f, ostream *out) const; void print_oneline_summary(ostream *out) const; diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 0644922ddb4..c14872d87ef 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -141,6 +141,31 @@ void PGMonitor::tick() } } + /* If we have deltas for pools, run through pgmap's 'per_pool_sum_delta' and + * clear any deltas that are old enough. + * + * Note that 'per_pool_sum_delta' keeps a pool id as key, and a pair containing + * the calc'ed stats delta and an absolute timestamp from when those stats were + * obtained -- the timestamp IS NOT a delta itself. + */ + if (!pg_map.per_pool_sum_deltas.empty()) { + hash_map<uint64_t,pair<pool_stat_t,utime_t> >::iterator it; + for (it = pg_map.per_pool_sum_delta.begin(); + it != pg_map.per_pool_sum_delta.end(); ) { + utime_t age = ceph_clock_now(g_ceph_context) - it->second.second; + if (age > 2*g_conf->mon_delta_reset_interval) { + dout(10) << " clearing pg_map delta for pool " << it->first + << " (" << age << " > " << g_conf->mon_delta_reset_interval + << " seconds old)" << dendl; + pg_map.per_pool_sum_deltas.erase(it->first); + pg_map.per_pool_sum_deltas_stamps.erase(it->first); + pg_map.per_pool_sum_delta.erase((it++)->first); + } else { + ++it; + } + } + } + dout(10) << pg_map << dendl; } @@ -401,6 +426,7 @@ void PGMonitor::apply_pgmap_delta(bufferlist& bl) } pool_stat_t pg_sum_old = pg_map.pg_sum; + hash_map<uint64_t, pool_stat_t> pg_pool_sum_old; // pgs bufferlist::iterator p = dirty_pgs.begin(); @@ -410,6 +436,10 @@ void PGMonitor::apply_pgmap_delta(bufferlist& bl) dout(20) << " refreshing pg " << pgid << dendl; bufferlist bl; int r = mon->store->get(pgmap_pg_prefix, stringify(pgid), bl); + + if (pg_pool_sum_old.count(pgid.pool()) == 0) + pg_pool_sum_old[pgid.pool()] = pg_map.pg_pool_sum[pgid.pool()]; + if (r >= 0) { pg_map.update_pg(pgid, bl); } else { @@ -432,7 +462,8 @@ void PGMonitor::apply_pgmap_delta(bufferlist& bl) } } - pg_map.update_delta(g_ceph_context, inc_stamp, pg_sum_old); + pg_map.update_global_delta(g_ceph_context, inc_stamp, pg_sum_old); + pg_map.update_pool_deltas(g_ceph_context, inc_stamp, pg_pool_sum_old); // ok, we're now on the new version pg_map.version = v; @@ -1831,7 +1862,7 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary, // recovery stringstream rss; - pg_map.recovery_summary(NULL, &rss); + pg_map.overall_recovery_summary(NULL, &rss); if (!rss.str().empty()) { summary.push_back(make_pair(HEALTH_WARN, "recovery " + rss.str())); if (detail) @@ -1880,7 +1911,9 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary, detail->push_back(make_pair(HEALTH_WARN, ss.str())); } int average_objects_per_pg = pg_map.pg_sum.stats.sum.num_objects / pg_map.pg_stat.size(); - if (average_objects_per_pg > 0) { + if (average_objects_per_pg > 0 && + pg_map.pg_sum.stats.sum.num_objects >= g_conf->mon_pg_warn_min_objects && + p->second.stats.sum.num_objects >= g_conf->mon_pg_warn_min_pool_objects) { int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num(); float ratio = (float)objects_per_pg / (float)average_objects_per_pg; if (g_conf->mon_pg_warn_max_object_skew > 0 && diff --git a/src/os/LevelDBStore.h b/src/os/LevelDBStore.h index 89718ce1987..bc5b612a97a 100644 --- a/src/os/LevelDBStore.h +++ b/src/os/LevelDBStore.h @@ -329,13 +329,15 @@ public: string fpath = path + '/' + n; struct stat s; int err = stat(fpath.c_str(), &s); + if (err < 0) + err = -errno; // we may race against leveldb while reading files; this should only // happen when those files are being updated, data is being shuffled // and files get removed, in which case there's not much of a problem // as we'll get to them next time around. if ((err < 0) && (err != -ENOENT)) { lderr(cct) << __func__ << " error obtaining stats for " << fpath - << ": " << cpp_strerror(errno) << dendl; + << ": " << cpp_strerror(err) << dendl; goto err; } diff --git a/src/rgw/rgw_acl.cc b/src/rgw/rgw_acl.cc index 3f99d72cd5b..02504524847 100644 --- a/src/rgw/rgw_acl.cc +++ b/src/rgw/rgw_acl.cc @@ -79,7 +79,7 @@ int RGWAccessControlPolicy::get_perm(string& id, int perm_mask) { if ((perm & perm_mask) != perm_mask) { perm |= acl.get_group_perm(ACL_GROUP_ALL_USERS, perm_mask); - if (compare_group_name(id, ACL_GROUP_ALL_USERS) != 0) { + if (!compare_group_name(id, ACL_GROUP_ALL_USERS)) { /* this is not the anonymous user */ perm |= acl.get_group_perm(ACL_GROUP_AUTHENTICATED_USERS, perm_mask); } diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc index acaa5deffee..5fbecf88cab 100644 --- a/src/rgw/rgw_main.cc +++ b/src/rgw/rgw_main.cc @@ -468,7 +468,7 @@ int main(int argc, const char **argv) /* alternative default for module */ vector<const char *> def_args; - def_args.push_back("--debug-rgw=20"); + def_args.push_back("--debug-rgw=1/5"); def_args.push_back("--keyring=$rgw_data/keyring"); def_args.push_back("--log-file=/var/log/radosgw/$cluster-$name"); diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 9f0a900f3d3..20ca8d8eb8f 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -82,18 +82,26 @@ void RGWDefaultRegionInfo::decode_json(JSONObj *obj) { JSONDecoder::decode_json("default_region", default_region, obj); } -string RGWRegion::get_pool_name(CephContext *cct) +int RGWRegion::get_pool_name(CephContext *cct, string *pool_name) { - string pool_name = cct->_conf->rgw_region_root_pool; - if (pool_name.empty()) { - pool_name = RGW_DEFAULT_REGION_ROOT_POOL; + *pool_name = cct->_conf->rgw_region_root_pool; + if (pool_name->empty()) { + *pool_name = RGW_DEFAULT_REGION_ROOT_POOL; + } else if ((*pool_name)[0] != '.') { + derr << "ERROR: region root pool name must start with a period" << dendl; + return -EINVAL; } - return pool_name; + return 0; } int RGWRegion::read_default(RGWDefaultRegionInfo& default_info) { - string pool_name = get_pool_name(cct); + string pool_name; + + int ret = get_pool_name(cct, &pool_name); + if (ret < 0) { + return ret; + } string oid = cct->_conf->rgw_default_region_info_oid; if (oid.empty()) { @@ -102,7 +110,7 @@ int RGWRegion::read_default(RGWDefaultRegionInfo& default_info) rgw_bucket pool(pool_name.c_str()); bufferlist bl; - int ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL); + ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL); if (ret < 0) return ret; @@ -121,7 +129,10 @@ int RGWRegion::read_default(RGWDefaultRegionInfo& default_info) int RGWRegion::set_as_default() { - string pool_name = get_pool_name(cct); + string pool_name; + int ret = get_pool_name(cct, &pool_name); + if (ret < 0) + return ret; string oid = cct->_conf->rgw_default_region_info_oid; if (oid.empty()) { @@ -136,7 +147,7 @@ int RGWRegion::set_as_default() ::encode(default_info, bl); - int ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), false, NULL, 0, NULL); + ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), false, NULL, 0, NULL); if (ret < 0) return ret; @@ -185,7 +196,11 @@ int RGWRegion::init(CephContext *_cct, RGWRados *_store, bool setup_region) int RGWRegion::read_info(const string& region_name) { - string pool_name = get_pool_name(cct); + string pool_name; + int ret = get_pool_name(cct, &pool_name); + if (ret < 0) + return ret; + rgw_bucket pool(pool_name.c_str()); bufferlist bl; @@ -193,7 +208,7 @@ int RGWRegion::read_info(const string& region_name) string oid = region_info_oid_prefix + name; - int ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL); + ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL); if (ret < 0) { lderr(cct) << "failed reading region info from " << pool << ":" << oid << ": " << cpp_strerror(-ret) << dendl; return ret; @@ -246,7 +261,10 @@ int RGWRegion::create_default() int RGWRegion::store_info(bool exclusive) { - string pool_name = get_pool_name(cct); + string pool_name; + int ret = get_pool_name(cct, &pool_name); + if (ret < 0) + return ret; rgw_bucket pool(pool_name.c_str()); @@ -254,7 +272,7 @@ int RGWRegion::store_info(bool exclusive) bufferlist bl; ::encode(*this, bl); - int ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), exclusive, NULL, 0, NULL); + ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), exclusive, NULL, 0, NULL); return ret; } @@ -293,13 +311,17 @@ void RGWZoneParams::init_default(RGWRados *store) } } -string RGWZoneParams::get_pool_name(CephContext *cct) +int RGWZoneParams::get_pool_name(CephContext *cct, string *pool_name) { - string pool_name = cct->_conf->rgw_zone_root_pool; - if (pool_name.empty()) { - pool_name = RGW_DEFAULT_ZONE_ROOT_POOL; + *pool_name = cct->_conf->rgw_zone_root_pool; + if (pool_name->empty()) { + *pool_name = RGW_DEFAULT_ZONE_ROOT_POOL; + } else if ((*pool_name)[0] != '.') { + derr << "ERROR: zone root pool name must start with a period" << dendl; + return -EINVAL; } - return pool_name; + + return 0; } void RGWZoneParams::init_name(CephContext *cct, RGWRegion& region) @@ -319,13 +341,16 @@ int RGWZoneParams::init(CephContext *cct, RGWRados *store, RGWRegion& region) { init_name(cct, region); - string pool_name = get_pool_name(cct); + string pool_name; + int ret = get_pool_name(cct, &pool_name); + if (ret < 0) + return ret; rgw_bucket pool(pool_name.c_str()); bufferlist bl; string oid = zone_info_oid_prefix + name; - int ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL); + ret = rgw_get_system_obj(store, NULL, pool, oid, bl, NULL, NULL); if (ret < 0) return ret; @@ -344,14 +369,17 @@ int RGWZoneParams::store_info(CephContext *cct, RGWRados *store, RGWRegion& regi { init_name(cct, region); - string pool_name = get_pool_name(cct); + string pool_name; + int ret = get_pool_name(cct, &pool_name); + if (ret < 0) + return ret; rgw_bucket pool(pool_name.c_str()); string oid = zone_info_oid_prefix + name; bufferlist bl; ::encode(*this, bl); - int ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), false, NULL, 0, NULL); + ret = rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), false, NULL, 0, NULL); return ret; } @@ -1032,14 +1060,20 @@ int RGWRados::list_raw_prefixed_objs(string pool_name, const string& prefix, lis int RGWRados::list_regions(list<string>& regions) { - string pool_name = RGWRegion::get_pool_name(cct); + string pool_name; + int ret = RGWRegion::get_pool_name(cct, &pool_name); + if (ret < 0) + return ret; return list_raw_prefixed_objs(pool_name, region_info_oid_prefix, regions); } int RGWRados::list_zones(list<string>& zones) { - string pool_name = RGWZoneParams::get_pool_name(cct); + string pool_name; + int ret = RGWZoneParams::get_pool_name(cct, &pool_name); + if (ret < 0) + return ret; return list_raw_prefixed_objs(pool_name, zone_info_oid_prefix, zones); } diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 52b898123d4..b37652d9f3f 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -433,7 +433,7 @@ struct RGWZoneParams { map<string, RGWZonePlacementInfo> placement_pools; - static string get_pool_name(CephContext *cct); + static int get_pool_name(CephContext *cct, string *pool_name); void init_name(CephContext *cct, RGWRegion& region); int init(CephContext *cct, RGWRados *store, RGWRegion& region); void init_default(RGWRados *store); @@ -622,7 +622,7 @@ struct RGWRegion { int set_as_default(); int equals(const string& other_region); - static string get_pool_name(CephContext *cct); + static int get_pool_name(CephContext *cct, string *pool_name); void dump(Formatter *f) const; void decode_json(JSONObj *obj); diff --git a/src/test/Makefile.am b/src/test/Makefile.am index 59b4d89e930..84a228f1d4b 100644 --- a/src/test/Makefile.am +++ b/src/test/Makefile.am @@ -856,11 +856,6 @@ ceph_test_keyvaluedb_iterators_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL) ceph_test_keyvaluedb_iterators_CXXFLAGS = $(UNITTEST_CXXFLAGS) bin_DEBUGPROGRAMS += ceph_test_keyvaluedb_iterators -ceph_test_store_tool_SOURCES = test/ObjectMap/test_store_tool/test_store_tool.cc -ceph_test_store_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL) -ceph_test_store_tool_CXXFLAGS = $(UNITTEST_CXXFLAGS) -bin_DEBUGPROGRAMS += ceph_test_store_tool - ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc bin_DEBUGPROGRAMS += ceph_test_cfuse_cache_invalidate diff --git a/src/test/filestore/run_seed_to.sh b/src/test/filestore/run_seed_to.sh index fdf56141e12..d5bb671138c 100755 --- a/src/test/filestore/run_seed_to.sh +++ b/src/test/filestore/run_seed_to.sh @@ -246,13 +246,13 @@ do do_rm $tmp_name_a $tmp_name_a.fail $tmp_name_a.recover $v ceph_test_filestore_idempotent_sequence run-sequence-to $to \ $tmp_name_a $tmp_name_a/journal \ - --filestore-xattr-use-omap --test-seed $seed --osd-journal-size 100 \ + --test-seed $seed --osd-journal-size 100 \ --filestore-kill-at $killat $tmp_opts_a \ --log-file $tmp_name_a.fail --debug-filestore 20 || true stop_at=`ceph_test_filestore_idempotent_sequence get-last-op \ $tmp_name_a $tmp_name_a/journal \ - --filestore-xattr-use-omap --log-file $tmp_name_a.recover \ + --log-file $tmp_name_a.recover \ --debug-filestore 20 --debug-journal 20` if [[ "`expr $stop_at - $stop_at 2>/dev/null`" != "0" ]]; then @@ -265,12 +265,11 @@ do do_rm $tmp_name_b $tmp_name_b.clean $v ceph_test_filestore_idempotent_sequence run-sequence-to \ $stop_at $tmp_name_b $tmp_name_b/journal \ - --filestore-xattr-use-omap --test-seed $seed --osd-journal-size 100 \ + --test-seed $seed --osd-journal-size 100 \ --log-file $tmp_name_b.clean --debug-filestore 20 $tmp_opts_b if $v ceph_test_filestore_idempotent_sequence diff \ - $tmp_name_a $tmp_name_a/journal $tmp_name_b $tmp_name_b/journal \ - --filestore-xattr-use-omap; then + $tmp_name_a $tmp_name_a/journal $tmp_name_b $tmp_name_b/journal ; then echo OK else echo "FAIL" diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc index 7158f50a74a..842f9d2bca3 100644 --- a/src/test/osd/TestRados.cc +++ b/src/test/osd/TestRados.cc @@ -111,22 +111,23 @@ private: return new SnapCreateOp(m_op, &context, m_stats); case TEST_OP_SNAP_REMOVE: - if (context.snaps.empty()) { + if (context.snaps.size() <= context.snaps_in_use.size()) { return NULL; - } else { + } + while (true) { int snap = rand_choose(context.snaps)->first; + if (context.snaps_in_use.count(snap)) + continue; // in use; try again! cout << "snap_remove snap " << snap << std::endl; return new SnapRemoveOp(m_op, &context, snap, m_stats); } case TEST_OP_ROLLBACK: - if (context.snaps.size() <= context.snaps_in_use.size()) { + if (context.snaps.empty()) { return NULL; } - while (true) { + { int snap = rand_choose(context.snaps)->first; - if (context.snaps_in_use.count(snap)) - continue; // in use; try again! string oid = *(rand_choose(context.oid_not_in_use)); cout << "rollback oid " << oid << " to " << snap << std::endl; return new RollbackOp(m_op, &context, oid, snap); diff --git a/src/tools/Makefile.am b/src/tools/Makefile.am index 4b8da77951a..89417014dd4 100644 --- a/src/tools/Makefile.am +++ b/src/tools/Makefile.am @@ -6,6 +6,12 @@ ceph_monstore_tool_SOURCES = tools/ceph-monstore-tool.cc ceph_monstore_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL) -lboost_program_options bin_DEBUGPROGRAMS += ceph-monstore-tool +ceph_kvstore_tool_SOURCES = tools/ceph-kvstore-tool.cc +ceph_kvstore_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL) +ceph_kvstore_tool_CXXFLAGS = $(UNITTEST_CXXFLAGS) +bin_DEBUGPROGRAMS += ceph-kvstore-tool + + ceph_filestore_dump_SOURCES = tools/ceph-filestore-dump.cc ceph_filestore_dump_LDADD = $(LIBOSD) $(LIBOS) $(CEPH_GLOBAL) -lboost_program_options if LINUX diff --git a/src/test/ObjectMap/test_store_tool/test_store_tool.cc b/src/tools/ceph-kvstore-tool.cc index 8fcf3f30e82..e07391d5c51 100644 --- a/src/test/ObjectMap/test_store_tool/test_store_tool.cc +++ b/src/tools/ceph-kvstore-tool.cc @@ -25,21 +25,25 @@ #include "common/safe_io.h" #include "common/config.h" #include "common/strtol.h" +#include "include/stringify.h" using namespace std; class StoreTool { boost::scoped_ptr<KeyValueDB> db; + string store_path; public: - StoreTool(const string &path) { - LevelDBStore *db_ptr = new LevelDBStore(g_ceph_context, path); + StoreTool(const string &path) : store_path(path) { + LevelDBStore *db_ptr = new LevelDBStore(g_ceph_context, store_path); assert(!db_ptr->open(std::cerr)); db.reset(db_ptr); } - void list(const string &prefix, const bool do_crc) { + uint32_t traverse(const string &prefix, + const bool do_crc, + ostream *out) { KeyValueDB::WholeSpaceIterator iter = db->get_iterator(); if (prefix.empty()) @@ -47,18 +51,36 @@ class StoreTool else iter->seek_to_first(prefix); + uint32_t crc = -1; + while (iter->valid()) { pair<string,string> rk = iter->raw_key(); if (!prefix.empty() && (rk.first != prefix)) - break; + break; - std::cout << rk.first << ":" << rk.second; + if (out) + *out << rk.first << ":" << rk.second; if (do_crc) { - std::cout << " (" << iter->value().crc32c(0) << ")"; + bufferlist bl; + bl.append(rk.first); + bl.append(rk.second); + bl.append(iter->value()); + + crc = bl.crc32c(crc); + if (out) { + *out << " (" << bl.crc32c(0) << ")"; + } } - std::cout << std::endl; + if (out) + *out << std::endl; iter->next(); } + + return crc; + } + + void list(const string &prefix, const bool do_crc) { + traverse(prefix, do_crc, &std::cout); } bool exists(const string &prefix) { @@ -118,6 +140,70 @@ class StoreTool return (ret == 0); } + + int copy_store_to(const string &other_path, const int num_keys_per_tx) { + + if (num_keys_per_tx <= 0) { + std::cerr << "must specify a number of keys/tx > 0" << std::endl; + return -EINVAL; + } + + // open or create a leveldb store at @p other_path + LevelDBStore other(g_ceph_context, other_path); + int err = other.create_and_open(std::cerr); + if (err < 0) + return err; + + KeyValueDB::WholeSpaceIterator it = db->get_iterator(); + it->seek_to_first(); + uint64_t total_keys = 0; + uint64_t total_size = 0; + uint64_t total_txs = 0; + + utime_t started_at = ceph_clock_now(g_ceph_context); + + do { + int num_keys = 0; + + KeyValueDB::Transaction tx = other.get_transaction(); + + + while (it->valid() && num_keys < num_keys_per_tx) { + pair<string,string> k = it->raw_key(); + bufferlist v = it->value(); + tx->set(k.first, k.second, v); + + num_keys ++; + total_size += v.length(); + + it->next(); + } + + total_txs ++; + total_keys += num_keys; + + if (num_keys > 0) + other.submit_transaction_sync(tx); + + utime_t cur_duration = ceph_clock_now(g_ceph_context) - started_at; + std::cout << "ts = " << cur_duration << "s, copied " << total_keys + << " keys so far (" << stringify(si_t(total_size)) << ")" + << std::endl; + + } while (it->valid()); + + utime_t time_taken = ceph_clock_now(g_ceph_context) - started_at; + + std::cout << "summary:" << std::endl; + std::cout << " copied " << total_keys << " keys" << std::endl; + std::cout << " used " << total_txs << " transactions" << std::endl; + std::cout << " total size " << stringify(si_t(total_size)) << std::endl; + std::cout << " from '" << store_path << "' to '" << other_path << "'" + << std::endl; + std::cout << " duration " << time_taken << " seconds" << std::endl; + + return 0; + } }; void usage(const char *pname) @@ -132,6 +218,8 @@ void usage(const char *pname) << " crc <prefix> <key>\n" << " get-size\n" << " set <prefix> <key> [ver <N>|in <file>]\n" + << " store-copy <path> [num-keys-per-tx]\n" + << " store-crc <path>\n" << std::endl; } @@ -155,8 +243,6 @@ int main(int argc, const char *argv[]) string path(args[0]); string cmd(args[1]); - std::cout << "path: " << path << " cmd " << cmd << std::endl; - StoreTool st(path); if (cmd == "list" || cmd == "list-crc") { @@ -260,6 +346,30 @@ int main(int argc, const char *argv[]) << prefix << "," << key << ")" << std::endl; return 1; } + } else if (cmd == "store-copy") { + int num_keys_per_tx = 128; // magic number that just feels right. + if (argc < 4) { + usage(argv[0]); + return 1; + } else if (argc > 4) { + string err; + num_keys_per_tx = strict_strtol(argv[4], 10, &err); + if (!err.empty()) { + std::cerr << "invalid num_keys_per_tx: " << err << std::endl; + return 1; + } + } + + int ret = st.copy_store_to(argv[3], num_keys_per_tx); + if (ret < 0) { + std::cerr << "error copying store to path '" << argv[3] + << "': " << cpp_strerror(ret) << std::endl; + return 1; + } + + } else if (cmd == "store-crc") { + uint32_t crc = st.traverse(string(), true, NULL); + std::cout << "store at '" << path << "' crc " << crc << std::endl; } else { std::cerr << "Unrecognized command: " << cmd << std::endl; |