diff options
-rw-r--r-- | doc/control.rst | 5 | ||||
-rw-r--r-- | src/bash_completion/ceph | 2 | ||||
-rw-r--r-- | src/common/config_opts.h | 2 | ||||
-rw-r--r-- | src/messages/MOSDRepScrub.h | 19 | ||||
-rw-r--r-- | src/messages/MOSDScrub.h | 27 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 12 | ||||
-rw-r--r-- | src/mon/PGMap.cc | 3 | ||||
-rw-r--r-- | src/mon/PGMonitor.cc | 7 | ||||
-rw-r--r-- | src/osd/OSD.cc | 4 | ||||
-rw-r--r-- | src/osd/PG.cc | 83 | ||||
-rw-r--r-- | src/osd/PG.h | 15 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 7 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 46 | ||||
-rw-r--r-- | src/osd/osd_types.h | 17 | ||||
-rw-r--r-- | src/test/cli/ceph/help.t | 1 | ||||
-rw-r--r-- | src/tools/ceph.cc | 1 |
16 files changed, 204 insertions, 47 deletions
diff --git a/doc/control.rst b/doc/control.rst index 19c976e995a..0e4cbdeacbf 100644 --- a/doc/control.rst +++ b/doc/control.rst @@ -260,6 +260,11 @@ Get the value of a pool setting. Valid fields are: Sends a scrub command to osdN. To send the command to all osds, use ``*``. TODO: what does this actually do :: + $ ceph osd deep-scrub N + +Sends a deep scrub command to osdN. A deep scrub compares both the +metadata and the contents of objects between replicas. + $ ceph osd repair N Sends a repair command to osdN. To send the command to all osds, use ``*``. diff --git a/src/bash_completion/ceph b/src/bash_completion/ceph index cec2b852e5b..2ea53c60307 100644 --- a/src/bash_completion/ceph +++ b/src/bash_completion/ceph @@ -36,7 +36,7 @@ _ceph() return 0 ;; pg) - COMPREPLY=( $(compgen -W "stat dump getmap map send_pg_creates scrub repair" -- ${cur}) ) + COMPREPLY=( $(compgen -W "stat dump getmap map send_pg_creates scrub deep-scrub repair" -- ${cur}) ) return 0 ;; osd) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 2ad3b0a23ca..8e959f775ca 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -322,6 +322,8 @@ OPTION(osd_max_scrubs, OPT_INT, 1) OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5) OPTION(osd_scrub_min_interval, OPT_FLOAT, 300) OPTION(osd_scrub_max_interval, OPT_FLOAT, 60*60*24) // once a day +OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week +OPTION(osd_deep_scrub_stride, OPT_INT, 524288) OPTION(osd_auto_weight, OPT_BOOL, false) OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored OPTION(osd_check_for_log_corruption, OPT_BOOL, false) diff --git a/src/messages/MOSDRepScrub.h b/src/messages/MOSDRepScrub.h index 184d153bcc9..2d3a66d96af 100644 --- a/src/messages/MOSDRepScrub.h +++ b/src/messages/MOSDRepScrub.h @@ -24,7 +24,7 @@ struct MOSDRepScrub : public Message { - static const int HEAD_VERSION = 3; + static const int HEAD_VERSION = 4; static const int COMPAT_VERSION = 2; pg_t pgid; // PG to scrub @@ -34,6 +34,7 @@ struct MOSDRepScrub : public Message { bool chunky; // true for chunky scrubs hobject_t start; // lower bound of scrub, inclusive hobject_t end; // upper bound of scrub, exclusive + bool deep; // true if scrub should be deep MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION) { } MOSDRepScrub(pg_t pgid, eversion_t scrub_from, eversion_t scrub_to, @@ -43,17 +44,19 @@ struct MOSDRepScrub : public Message { scrub_from(scrub_from), scrub_to(scrub_to), map_epoch(map_epoch), - chunky(false) { } + chunky(false), + deep(false) { } MOSDRepScrub(pg_t pgid, eversion_t scrub_to, epoch_t map_epoch, - hobject_t start, hobject_t end) + hobject_t start, hobject_t end, bool deep) : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION), pgid(pgid), scrub_to(scrub_to), map_epoch(map_epoch), chunky(true), start(start), - end(end) { } + end(end), + deep(deep) { } private: @@ -66,6 +69,7 @@ public: out << pgid << ",from:" << scrub_from << ",to:" << scrub_to << ",epoch:" << map_epoch << ",start:" << start << ",end:" << end << ",chunky:" << chunky + << ",deep:" << deep << ",version:" << header.version; out << ")"; } @@ -78,6 +82,7 @@ public: ::encode(chunky, payload); ::encode(start, payload); ::encode(end, payload); + ::encode(deep, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); @@ -90,8 +95,14 @@ public: ::decode(chunky, p); ::decode(start, p); ::decode(end, p); + if (header.version >= 4) { + ::decode(deep, p); + } else { + deep = false; + } } else { // v2 scrub: non-chunky chunky = false; + deep = false; } } }; diff --git a/src/messages/MOSDScrub.h b/src/messages/MOSDScrub.h index e4c9bd15868..72661f89598 100644 --- a/src/messages/MOSDScrub.h +++ b/src/messages/MOSDScrub.h @@ -23,17 +23,22 @@ */ struct MOSDScrub : public Message { + + static const int HEAD_VERSION = 2; + static const int COMPAT_VERSION = 1; + uuid_d fsid; vector<pg_t> scrub_pgs; bool repair; + bool deep; - MOSDScrub() : Message(MSG_OSD_SCRUB) {} - MOSDScrub(const uuid_d& f, bool r) : - Message(MSG_OSD_SCRUB), - fsid(f), repair(r) {} - MOSDScrub(const uuid_d& f, vector<pg_t>& pgs, bool r) : - Message(MSG_OSD_SCRUB), - fsid(f), scrub_pgs(pgs), repair(r) {} + MOSDScrub() : Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION) {} + MOSDScrub(const uuid_d& f, bool r, bool d) : + Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION), + fsid(f), repair(r), deep(d) {} + MOSDScrub(const uuid_d& f, vector<pg_t>& pgs, bool r, bool d) : + Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION), + fsid(f), scrub_pgs(pgs), repair(r), deep(d) {} private: ~MOSDScrub() {} @@ -47,6 +52,8 @@ public: out << scrub_pgs; if (repair) out << " repair"; + if (deep) + out << " deep"; out << ")"; } @@ -54,12 +61,18 @@ public: ::encode(fsid, payload); ::encode(scrub_pgs, payload); ::encode(repair, payload); + ::encode(deep, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); ::decode(fsid, p); ::decode(scrub_pgs, p); ::decode(repair, p); + if (header.version >= 2) { + ::decode(deep, p); + } else { + deep = false; + } } }; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 3e7604f9ef3..adfe86a0e35 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1649,10 +1649,12 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) r = 0; } } - else if ((m->cmd[1] == "scrub" || m->cmd[1] == "repair")) { + else if ((m->cmd[1] == "scrub" || + m->cmd[1] == "deep-scrub" || + m->cmd[1] == "repair")) { if (m->cmd.size() <= 2) { r = -EINVAL; - ss << "usage: osd [scrub|repair] <who>"; + ss << "usage: osd [scrub|deep-scrub|repair] <who>"; goto out; } if (m->cmd[2] == "*") { @@ -1662,7 +1664,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) if (osdmap.is_up(i)) { ss << (c++ ? ",":"") << i; mon->try_send_message(new MOSDScrub(osdmap.get_fsid(), - m->cmd[1] == "repair"), + m->cmd[1] == "repair", + m->cmd[1] == "deep-scrub"), osdmap.get_inst(i)); } r = 0; @@ -1671,7 +1674,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) long osd = strtol(m->cmd[2].c_str(), 0, 10); if (osdmap.is_up(osd)) { mon->try_send_message(new MOSDScrub(osdmap.get_fsid(), - m->cmd[1] == "repair"), + m->cmd[1] == "repair", + m->cmd[1] == "deep-scrub"), osdmap.get_inst(osd)); r = 0; ss << "osd." << osd << " instructed to " << m->cmd[1]; diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index accc1b73a20..f056271e227 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -464,7 +464,7 @@ void PGMap::dump_osd_stats(Formatter *f) const void PGMap::dump_pg_stats_plain(ostream& ss, const hash_map<pg_t, pg_stat_t>& pg_stats) const { - ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp" << std::endl; + ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp\tlast_deep_scrub\tdeep_scrub_stamp" << std::endl; for (hash_map<pg_t, pg_stat_t>::const_iterator i = pg_stats.begin(); i != pg_stats.end(); ++i) { const pg_stat_t &st(i->second); @@ -484,6 +484,7 @@ void PGMap::dump_pg_stats_plain(ostream& ss, << "\t" << st.up << "\t" << st.acting << "\t" << st.last_scrub << "\t" << st.last_scrub_stamp + << "\t" << st.last_deep_scrub << "\t" << st.last_deep_scrub_stamp << std::endl; } } diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index d18aeaa226b..a052bb85bd4 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -997,7 +997,9 @@ bool PGMonitor::preprocess_command(MMonCommand *m) } else ss << "invalid pgid '" << m->cmd[2] << "'"; } - else if ((m->cmd[1] == "scrub" || m->cmd[1] == "repair") && m->cmd.size() == 3) { + else if ((m->cmd[1] == "scrub" || + m->cmd[1] == "deep-scrub" || + m->cmd[1] == "repair") && m->cmd.size() == 3) { pg_t pgid; r = -EINVAL; if (pgid.parse(m->cmd[2].c_str())) { @@ -1008,7 +1010,8 @@ bool PGMonitor::preprocess_command(MMonCommand *m) vector<pg_t> pgs(1); pgs[0] = pgid; mon->try_send_message(new MOSDScrub(mon->monmap->fsid, pgs, - m->cmd[1] == "repair"), + m->cmd[1] == "repair", + m->cmd[1] == "deep-scrub"), mon->osdmon()->osdmap.get_inst(osd)); ss << "instructing pg " << pgid << " on osd." << osd << " to " << m->cmd[1]; r = 0; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 271ec8f2d93..5add6750558 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3257,6 +3257,8 @@ void OSD::handle_scrub(MOSDScrub *m) if (pg->is_primary()) { if (m->repair) pg->state_set(PG_STATE_REPAIR); + if (m->deep) + pg->state_set(PG_STATE_DEEP_SCRUB); if (pg->queue_scrub()) { dout(10) << "queueing " << *pg << " for scrub" << dendl; } @@ -3273,6 +3275,8 @@ void OSD::handle_scrub(MOSDScrub *m) if (pg->is_primary()) { if (m->repair) pg->state_set(PG_STATE_REPAIR); + if (m->deep) + pg->state_set(PG_STATE_DEEP_SCRUB); if (pg->queue_scrub()) { dout(10) << "queueing " << *pg << " for scrub" << dendl; } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 3e8135b4750..ff8555efc94 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1926,6 +1926,8 @@ void PG::update_stats() info.stats.created = info.history.epoch_created; info.stats.last_scrub = info.history.last_scrub; info.stats.last_scrub_stamp = info.history.last_scrub_stamp; + info.stats.last_deep_scrub = info.history.last_deep_scrub; + info.stats.last_deep_scrub_stamp = info.history.last_deep_scrub_stamp; info.stats.last_epoch_clean = info.history.last_epoch_clean; utime_t now = ceph_clock_now(g_ceph_context); @@ -2712,6 +2714,11 @@ bool PG::sched_scrub() return true; } + if (ceph_clock_now(g_ceph_context) > info.history.last_deep_scrub_stamp + g_conf->osd_deep_scrub_interval) { + dout(10) << "sched_scrub: scrub will be deep" << dendl; + scrubber.deep = true; + } + bool ret = false; if (!scrubber.reserved) { assert(scrubber.reserved_peers.empty()); @@ -2804,9 +2811,10 @@ void PG::sub_op_scrub_map(OpRequestRef op) /* * pg lock may or may not be held */ -void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls) +void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep) { - dout(10) << "_scan_list scanning " << ls.size() << " objects" << dendl; + dout(10) << "_scan_list scanning " << ls.size() << " objects" + << (deep ? " deeply" : "") << dendl; int i = 0; for (vector<hobject_t>::iterator p = ls.begin(); p != ls.end(); @@ -2820,6 +2828,23 @@ void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls) o.size = st.st_size; assert(!o.negative); osd->store->getattrs(coll, poid, o.attrs); + + // calculate the CRC32 on deep scrubs + if (deep) { + bufferhash h; + bufferlist bl; + int r; + __u64 pos = 0; + while ( (r = osd->store->read(coll, poid, pos, + g_conf->osd_deep_scrub_stride, bl)) > 0) { + h << bl; + pos += bl.length(); + bl.clear(); + } + o.digest = h.digest(); + o.digest_present = true; + } + dout(25) << "_scan_list " << poid << dendl; } else { dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl; @@ -2840,13 +2865,15 @@ void PG::_request_scrub_map_classic(int replica, eversion_t version) } // send scrub v3 messages (chunky scrub) -void PG::_request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end) +void PG::_request_scrub_map(int replica, eversion_t version, + hobject_t start, hobject_t end, + bool deep) { assert(replica != osd->whoami); dout(10) << "scrub requesting scrubmap from osd." << replica << dendl; MOSDRepScrub *repscrubop = new MOSDRepScrub(info.pgid, version, get_osdmap()->get_epoch(), - start, end); + start, end, deep); osd->cluster_messenger->send_message(repscrubop, get_osdmap()->get_cluster_inst(replica)); } @@ -2977,7 +3004,8 @@ void PG::scrub_unreserve_replicas() * build a scrub map over a chunk without releasing the lock * only used by chunky scrub */ -int PG::build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end) +int PG::build_scrub_map_chunk(ScrubMap &map, + hobject_t start, hobject_t end, bool deep) { dout(10) << "build_scrub_map" << dendl; dout(20) << "scrub_map_chunk [" << start << "," << end << ")" << dendl; @@ -2992,7 +3020,7 @@ int PG::build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end) return ret; } - _scan_list(map, ls); + _scan_list(map, ls, deep); // pg attrs osd->store->collection_getattrs(coll, map.attrs); @@ -3025,7 +3053,7 @@ void PG::build_scrub_map(ScrubMap &map) vector<hobject_t> ls; osd->store->collection_list(coll, ls); - _scan_list(map, ls); + _scan_list(map, ls, false); lock(); if (epoch != info.history.same_interval_since) { @@ -3073,7 +3101,7 @@ void PG::build_inc_scrub_map(ScrubMap &map, eversion_t v) } } - _scan_list(map, ls); + _scan_list(map, ls, false); // pg attrs osd->store->collection_getattrs(coll, map.attrs); @@ -3155,7 +3183,7 @@ void PG::replica_scrub(MOSDRepScrub *msg) return; } - build_scrub_map_chunk(map, msg->start, msg->end); + build_scrub_map_chunk(map, msg->start, msg->end, msg->deep); } else { if (msg->scrub_from > eversion_t()) { @@ -3216,7 +3244,7 @@ void PG::scrub() return; } - // when the scrub is not active, we need to determine which type of scrub to do + // when we're starting a scrub, we need to determine which type of scrub to do if (!scrubber.active) { OSDMapRef curmap = osd->get_osdmap(); scrubber.is_chunky = true; @@ -3231,6 +3259,12 @@ void PG::scrub() } } + if (scrubber.is_chunky) { + scrubber.deep = state_test(PG_STATE_DEEP_SCRUB); + } else { + state_clear(PG_STATE_DEEP_SCRUB); + } + dout(10) << "starting a new " << (scrubber.is_chunky ? "chunky" : "classic") << " scrub" << dendl; } @@ -3548,7 +3582,7 @@ void PG::chunky_scrub() { // request maps from replicas for (unsigned i=1; i<acting.size(); i++) { _request_scrub_map(acting[i], scrubber.subset_last_update, - scrubber.start, scrubber.end); + scrubber.start, scrubber.end, scrubber.deep); scrubber.waiting_on_whom.insert(acting[i]); ++scrubber.waiting_on; } @@ -3580,7 +3614,9 @@ void PG::chunky_scrub() { assert(last_update_applied >= scrubber.subset_last_update); // build my own scrub map - ret = build_scrub_map_chunk(scrubber.primary_scrubmap, scrubber.start, scrubber.end); + ret = build_scrub_map_chunk(scrubber.primary_scrubmap, + scrubber.start, scrubber.end, + scrubber.deep); if (ret < 0) { dout(5) << "error building scrub map: " << ret << ", aborting" << dendl; scrub_clear_state(); @@ -3645,6 +3681,7 @@ void PG::scrub_clear_state() assert(_lock.is_locked()); state_clear(PG_STATE_SCRUBBING); state_clear(PG_STATE_REPAIR); + state_clear(PG_STATE_DEEP_SCRUB); update_stats(); // active -> nothing. @@ -3691,6 +3728,16 @@ bool PG::_compare_scrub_objects(ScrubMap::object &auth, errorstream << "size " << candidate.size << " != known size " << auth.size; } + if (auth.digest_present && candidate.digest_present) { + if (auth.digest != candidate.digest) { + if (!ok) + errorstream << ", "; + ok = false; + + errorstream << "digest " << candidate.digest + << " != known digest " << auth.digest; + } + } for (map<string,bufferptr>::const_iterator i = auth.attrs.begin(); i != auth.attrs.end(); i++) { @@ -3755,6 +3802,7 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps, j->second->objects[*k], ss)) { cur_inconsistent.insert(j->first); + ++scrubber.errors; errorstream << info.pgid << " osd." << acting[j->first] << ": soid " << *k << " " << ss.str() << std::endl; } @@ -3781,7 +3829,8 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps, void PG::scrub_compare_maps() { dout(10) << "scrub_compare_maps has maps, analyzing" << dendl; bool repair = state_test(PG_STATE_REPAIR); - const char *mode = repair ? "repair":"scrub"; + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); if (acting.size() > 1) { dout(10) << "scrub comparing replica scrub maps" << dendl; @@ -3837,6 +3886,7 @@ void PG::scrub_compare_maps() { &maps[i->second]->objects[i->first], acting[*j], acting[i->second]); + ++scrubber.fixed; } } @@ -3883,7 +3933,8 @@ void PG::scrub_finalize() { // the part that actually finalizes a scrub void PG::scrub_finish() { bool repair = state_test(PG_STATE_REPAIR); - const char *mode = repair ? "repair":"scrub"; + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); // type-specific finish (can tally more errors) _scrub_finish(); @@ -3911,6 +3962,10 @@ void PG::scrub_finish() { osd->unreg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp); info.history.last_scrub = info.last_update; info.history.last_scrub_stamp = ceph_clock_now(g_ceph_context); + if (scrubber.deep) { + info.history.last_deep_scrub = info.last_update; + info.history.last_deep_scrub_stamp = ceph_clock_now(g_ceph_context); + } osd->reg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp); { diff --git a/src/osd/PG.h b/src/osd/PG.h index 5c39cd89f10..0ad34514ea8 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -778,7 +778,8 @@ public: reserved(false), reserve_failed(false), block_writes(false), active(false), waiting_on(0), errors(0), fixed(0), active_rep_scrub(0), - finalizing(false), is_chunky(false), state(INACTIVE) + finalizing(false), is_chunky(false), state(INACTIVE), + deep(false) { } @@ -818,6 +819,9 @@ public: FINISH, } state; + // deep scrub + bool deep; + static const char *state_string(const PG::Scrubber::State& state) { const char *ret = NULL; switch( state ) @@ -855,6 +859,7 @@ public: subset_last_update = eversion_t(); errors = 0; fixed = 0; + deep = false; } } scrubber; @@ -878,10 +883,12 @@ public: void scrub_finish(); void scrub_clear_state(); bool scrub_gather_replica_maps(); - void _scan_list(ScrubMap &map, vector<hobject_t> &ls); + void _scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep); void _request_scrub_map_classic(int replica, eversion_t version); - void _request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end); - int build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end); + void _request_scrub_map(int replica, eversion_t version, + hobject_t start, hobject_t end, bool deep); + int build_scrub_map_chunk(ScrubMap &map, + hobject_t start, hobject_t end, bool deep); void build_scrub_map(ScrubMap &map); void build_inc_scrub_map(ScrubMap &map, eversion_t v); virtual void _scrub(ScrubMap &map) { } diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 01cab81e52b..332e0501b9e 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -6596,7 +6596,8 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) coll_t c(info.pgid); bool repair = state_test(PG_STATE_REPAIR); - const char *mode = repair ? "repair":"scrub"; + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); // traverse in reverse order. hobject_t head; @@ -6685,6 +6686,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) //bufferlist data; //osd->store->read(c, poid, 0, 0, data); //assert(data.length() == p->size); + // if (soid.snap == CEPH_NOSNAP) { if (!snapset.head_exists) { @@ -6737,7 +6739,8 @@ void ReplicatedPG::_scrub_clear_state() void ReplicatedPG::_scrub_finish() { bool repair = state_test(PG_STATE_REPAIR); - const char *mode = repair ? "repair":"scrub"; + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); dout(10) << mode << " got " << scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, " diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index cdbdfa95704..a981d02f900 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -406,6 +406,8 @@ std::string pg_state_string(int state) oss << "remapped+"; if (state & PG_STATE_SCRUBBING) oss << "scrubbing+"; + if (state & PG_STATE_DEEP_SCRUB) + oss << "deep+"; if (state & PG_STATE_SCRUBQ) oss << "scrubq+"; if (state & PG_STATE_INCONSISTENT) @@ -971,6 +973,8 @@ void pg_stat_t::dump(Formatter *f) const f->dump_unsigned("parent_split_bits", parent_split_bits); f->dump_stream("last_scrub") << last_scrub; f->dump_stream("last_scrub_stamp") << last_scrub_stamp; + f->dump_stream("last_deep_scrub") << last_deep_scrub; + f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp; f->dump_unsigned("log_size", log_size); f->dump_unsigned("ondisk_log_size", ondisk_log_size); stats.dump(f); @@ -986,7 +990,7 @@ void pg_stat_t::dump(Formatter *f) const void pg_stat_t::encode(bufferlist &bl) const { - ENCODE_START(9, 8, bl); + ENCODE_START(10, 8, bl); ::encode(version, bl); ::encode(reported, bl); ::encode(state, bl); @@ -1009,12 +1013,14 @@ void pg_stat_t::encode(bufferlist &bl) const ::encode(last_clean, bl); ::encode(last_unstale, bl); ::encode(mapping_epoch, bl); + ::encode(last_deep_scrub, bl); + ::encode(last_deep_scrub_stamp, bl); ENCODE_FINISH(bl); } void pg_stat_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(9, 8, 8, bl); + DECODE_START_LEGACY_COMPAT_LEN(10, 8, 8, bl); ::decode(version, bl); ::decode(reported, bl); ::decode(state, bl); @@ -1072,6 +1078,10 @@ void pg_stat_t::decode(bufferlist::iterator &bl) ::decode(last_clean, bl); ::decode(last_unstale, bl); ::decode(mapping_epoch, bl); + if (struct_v >= 10) { + ::decode(last_deep_scrub, bl); + ::decode(last_deep_scrub_stamp, bl); + } } } DECODE_FINISH(bl); @@ -1099,6 +1109,8 @@ void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o) a.parent_split_bits = 12; a.last_scrub = eversion_t(9, 10); a.last_scrub_stamp = utime_t(11, 12); + a.last_deep_scrub = eversion_t(13, 14); + a.last_deep_scrub_stamp = utime_t(15, 16); list<object_stat_collection_t*> l; object_stat_collection_t::generate_test_instances(l); a.stats = *l.back(); @@ -1177,7 +1189,7 @@ void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o) void pg_history_t::encode(bufferlist &bl) const { - ENCODE_START(4, 4, bl); + ENCODE_START(5, 4, bl); ::encode(epoch_created, bl); ::encode(last_epoch_started, bl); ::encode(last_epoch_clean, bl); @@ -1187,12 +1199,14 @@ void pg_history_t::encode(bufferlist &bl) const ::encode(same_primary_since, bl); ::encode(last_scrub, bl); ::encode(last_scrub_stamp, bl); + ::encode(last_deep_scrub, bl); + ::encode(last_deep_scrub_stamp, bl); ENCODE_FINISH(bl); } void pg_history_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl); + DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, bl); ::decode(epoch_created, bl); ::decode(last_epoch_started, bl); if (struct_v >= 3) @@ -1206,6 +1220,10 @@ void pg_history_t::decode(bufferlist::iterator &bl) if (struct_v >= 2) { ::decode(last_scrub, bl); ::decode(last_scrub_stamp, bl); + if (struct_v >= 5) { + ::decode(last_deep_scrub, bl); + ::decode(last_deep_scrub_stamp, bl); + } } DECODE_FINISH(bl); } @@ -1221,6 +1239,8 @@ void pg_history_t::dump(Formatter *f) const f->dump_int("same_primary_since", same_primary_since); f->dump_stream("last_scrub") << last_scrub; f->dump_stream("last_scrub_stamp") << last_scrub_stamp; + f->dump_stream("last_deep_scrub") << last_deep_scrub; + f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp; } void pg_history_t::generate_test_instances(list<pg_history_t*>& o) @@ -1235,7 +1255,9 @@ void pg_history_t::generate_test_instances(list<pg_history_t*>& o) o.back()->same_interval_since = 6; o.back()->same_primary_since = 7; o.back()->last_scrub = eversion_t(8, 9); - o.back()->last_scrub_stamp = utime_t(10, 11); + o.back()->last_scrub_stamp = utime_t(10, 11); + o.back()->last_deep_scrub = eversion_t(12, 13); + o.back()->last_deep_scrub_stamp = utime_t(14, 15); } @@ -2591,19 +2613,29 @@ void ScrubMap::generate_test_instances(list<ScrubMap*>& o) void ScrubMap::object::encode(bufferlist& bl) const { - ENCODE_START(2, 2, bl); + ENCODE_START(3, 2, bl); ::encode(size, bl); ::encode(negative, bl); ::encode(attrs, bl); + ::encode(digest, bl); + ::encode(digest_present, bl); ENCODE_FINISH(bl); } void ScrubMap::object::decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); ::decode(size, bl); ::decode(negative, bl); ::decode(attrs, bl); + if (struct_v >= 3) { + ::decode(digest, bl); + ::decode(digest_present, bl); + } + else { + digest = 0; + digest_present = false; + } DECODE_FINISH(bl); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 7cb590bdea6..b2b59b33f18 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -564,6 +564,7 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) { #define PG_STATE_INCOMPLETE (1<<16) // incomplete content, peering failed. #define PG_STATE_STALE (1<<17) // our state for this pg is stale, unknown. #define PG_STATE_REMAPPED (1<<18) // pg is explicitly remapped to different OSDs than CRUSH +#define PG_STATE_DEEP_SCRUB (1<<19) // deep scrub: check CRC32 on files std::string pg_state_string(int state); @@ -862,7 +863,9 @@ struct pg_stat_t { __u32 parent_split_bits; eversion_t last_scrub; + eversion_t last_deep_scrub; utime_t last_scrub_stamp; + utime_t last_deep_scrub_stamp; object_stat_collection_t stats; @@ -951,7 +954,9 @@ struct pg_history_t { epoch_t same_primary_since; // same primary at least back through this epoch. eversion_t last_scrub; + eversion_t last_deep_scrub; utime_t last_scrub_stamp; + utime_t last_deep_scrub_stamp; pg_history_t() : epoch_created(0), @@ -985,6 +990,14 @@ struct pg_history_t { last_scrub_stamp = other.last_scrub_stamp; modified = true; } + if (other.last_deep_scrub > last_deep_scrub) { + last_deep_scrub = other.last_deep_scrub; + modified = true; + } + if (other.last_deep_scrub_stamp > last_deep_scrub_stamp) { + last_deep_scrub_stamp = other.last_deep_scrub_stamp; + modified = true; + } return modified; } @@ -1777,8 +1790,10 @@ struct ScrubMap { uint64_t size; bool negative; map<string,bufferptr> attrs; + __u32 digest; + bool digest_present; - object(): size(0), negative(false) {} + object(): size(0), negative(false), digest(0), digest_present(false) {} void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); diff --git a/src/test/cli/ceph/help.t b/src/test/cli/ceph/help.t index 47e5b385008..bbd7ff76d9a 100644 --- a/src/test/cli/ceph/help.t +++ b/src/test/cli/ceph/help.t @@ -56,6 +56,7 @@ ceph osd pool rename <pool> <new pool name> ceph osd pool set <pool> <field> <value> ceph osd scrub <osd-id> + ceph osd deep-scrub <osd-id> ceph osd repair <osd-id> ceph osd tell N bench [bytes per write] [total bytes] diff --git a/src/tools/ceph.cc b/src/tools/ceph.cc index b82be10b9bf..278033c46cb 100644 --- a/src/tools/ceph.cc +++ b/src/tools/ceph.cc @@ -99,6 +99,7 @@ static void usage() cout << " ceph osd pool rename <pool> <new pool name>\n"; cout << " ceph osd pool set <pool> <field> <value>\n"; cout << " ceph osd scrub <osd-id>\n"; + cout << " ceph osd deep-scrub <osd-id>\n"; cout << " ceph osd repair <osd-id>\n"; cout << " ceph osd tell N bench [bytes per write] [total bytes]\n"; cout << "\n"; |