summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/control.rst5
-rw-r--r--src/bash_completion/ceph2
-rw-r--r--src/common/config_opts.h2
-rw-r--r--src/messages/MOSDRepScrub.h19
-rw-r--r--src/messages/MOSDScrub.h27
-rw-r--r--src/mon/OSDMonitor.cc12
-rw-r--r--src/mon/PGMap.cc3
-rw-r--r--src/mon/PGMonitor.cc7
-rw-r--r--src/osd/OSD.cc4
-rw-r--r--src/osd/PG.cc83
-rw-r--r--src/osd/PG.h15
-rw-r--r--src/osd/ReplicatedPG.cc7
-rw-r--r--src/osd/osd_types.cc46
-rw-r--r--src/osd/osd_types.h17
-rw-r--r--src/test/cli/ceph/help.t1
-rw-r--r--src/tools/ceph.cc1
16 files changed, 204 insertions, 47 deletions
diff --git a/doc/control.rst b/doc/control.rst
index 19c976e995a..0e4cbdeacbf 100644
--- a/doc/control.rst
+++ b/doc/control.rst
@@ -260,6 +260,11 @@ Get the value of a pool setting. Valid fields are:
Sends a scrub command to osdN. To send the command to all osds, use ``*``.
TODO: what does this actually do ::
+ $ ceph osd deep-scrub N
+
+Sends a deep scrub command to osdN. A deep scrub compares both the
+metadata and the contents of objects between replicas.
+
$ ceph osd repair N
Sends a repair command to osdN. To send the command to all osds, use ``*``.
diff --git a/src/bash_completion/ceph b/src/bash_completion/ceph
index cec2b852e5b..2ea53c60307 100644
--- a/src/bash_completion/ceph
+++ b/src/bash_completion/ceph
@@ -36,7 +36,7 @@ _ceph()
return 0
;;
pg)
- COMPREPLY=( $(compgen -W "stat dump getmap map send_pg_creates scrub repair" -- ${cur}) )
+ COMPREPLY=( $(compgen -W "stat dump getmap map send_pg_creates scrub deep-scrub repair" -- ${cur}) )
return 0
;;
osd)
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 2ad3b0a23ca..8e959f775ca 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -322,6 +322,8 @@ OPTION(osd_max_scrubs, OPT_INT, 1)
OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
OPTION(osd_scrub_min_interval, OPT_FLOAT, 300)
OPTION(osd_scrub_max_interval, OPT_FLOAT, 60*60*24) // once a day
+OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
+OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
OPTION(osd_auto_weight, OPT_BOOL, false)
OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
OPTION(osd_check_for_log_corruption, OPT_BOOL, false)
diff --git a/src/messages/MOSDRepScrub.h b/src/messages/MOSDRepScrub.h
index 184d153bcc9..2d3a66d96af 100644
--- a/src/messages/MOSDRepScrub.h
+++ b/src/messages/MOSDRepScrub.h
@@ -24,7 +24,7 @@
struct MOSDRepScrub : public Message {
- static const int HEAD_VERSION = 3;
+ static const int HEAD_VERSION = 4;
static const int COMPAT_VERSION = 2;
pg_t pgid; // PG to scrub
@@ -34,6 +34,7 @@ struct MOSDRepScrub : public Message {
bool chunky; // true for chunky scrubs
hobject_t start; // lower bound of scrub, inclusive
hobject_t end; // upper bound of scrub, exclusive
+ bool deep; // true if scrub should be deep
MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION) { }
MOSDRepScrub(pg_t pgid, eversion_t scrub_from, eversion_t scrub_to,
@@ -43,17 +44,19 @@ struct MOSDRepScrub : public Message {
scrub_from(scrub_from),
scrub_to(scrub_to),
map_epoch(map_epoch),
- chunky(false) { }
+ chunky(false),
+ deep(false) { }
MOSDRepScrub(pg_t pgid, eversion_t scrub_to, epoch_t map_epoch,
- hobject_t start, hobject_t end)
+ hobject_t start, hobject_t end, bool deep)
: Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
pgid(pgid),
scrub_to(scrub_to),
map_epoch(map_epoch),
chunky(true),
start(start),
- end(end) { }
+ end(end),
+ deep(deep) { }
private:
@@ -66,6 +69,7 @@ public:
out << pgid << ",from:" << scrub_from << ",to:" << scrub_to
<< ",epoch:" << map_epoch << ",start:" << start << ",end:" << end
<< ",chunky:" << chunky
+ << ",deep:" << deep
<< ",version:" << header.version;
out << ")";
}
@@ -78,6 +82,7 @@ public:
::encode(chunky, payload);
::encode(start, payload);
::encode(end, payload);
+ ::encode(deep, payload);
}
void decode_payload() {
bufferlist::iterator p = payload.begin();
@@ -90,8 +95,14 @@ public:
::decode(chunky, p);
::decode(start, p);
::decode(end, p);
+ if (header.version >= 4) {
+ ::decode(deep, p);
+ } else {
+ deep = false;
+ }
} else { // v2 scrub: non-chunky
chunky = false;
+ deep = false;
}
}
};
diff --git a/src/messages/MOSDScrub.h b/src/messages/MOSDScrub.h
index e4c9bd15868..72661f89598 100644
--- a/src/messages/MOSDScrub.h
+++ b/src/messages/MOSDScrub.h
@@ -23,17 +23,22 @@
*/
struct MOSDScrub : public Message {
+
+ static const int HEAD_VERSION = 2;
+ static const int COMPAT_VERSION = 1;
+
uuid_d fsid;
vector<pg_t> scrub_pgs;
bool repair;
+ bool deep;
- MOSDScrub() : Message(MSG_OSD_SCRUB) {}
- MOSDScrub(const uuid_d& f, bool r) :
- Message(MSG_OSD_SCRUB),
- fsid(f), repair(r) {}
- MOSDScrub(const uuid_d& f, vector<pg_t>& pgs, bool r) :
- Message(MSG_OSD_SCRUB),
- fsid(f), scrub_pgs(pgs), repair(r) {}
+ MOSDScrub() : Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION) {}
+ MOSDScrub(const uuid_d& f, bool r, bool d) :
+ Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION),
+ fsid(f), repair(r), deep(d) {}
+ MOSDScrub(const uuid_d& f, vector<pg_t>& pgs, bool r, bool d) :
+ Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION),
+ fsid(f), scrub_pgs(pgs), repair(r), deep(d) {}
private:
~MOSDScrub() {}
@@ -47,6 +52,8 @@ public:
out << scrub_pgs;
if (repair)
out << " repair";
+ if (deep)
+ out << " deep";
out << ")";
}
@@ -54,12 +61,18 @@ public:
::encode(fsid, payload);
::encode(scrub_pgs, payload);
::encode(repair, payload);
+ ::encode(deep, payload);
}
void decode_payload() {
bufferlist::iterator p = payload.begin();
::decode(fsid, p);
::decode(scrub_pgs, p);
::decode(repair, p);
+ if (header.version >= 2) {
+ ::decode(deep, p);
+ } else {
+ deep = false;
+ }
}
};
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 3e7604f9ef3..adfe86a0e35 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1649,10 +1649,12 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
r = 0;
}
}
- else if ((m->cmd[1] == "scrub" || m->cmd[1] == "repair")) {
+ else if ((m->cmd[1] == "scrub" ||
+ m->cmd[1] == "deep-scrub" ||
+ m->cmd[1] == "repair")) {
if (m->cmd.size() <= 2) {
r = -EINVAL;
- ss << "usage: osd [scrub|repair] <who>";
+ ss << "usage: osd [scrub|deep-scrub|repair] <who>";
goto out;
}
if (m->cmd[2] == "*") {
@@ -1662,7 +1664,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
if (osdmap.is_up(i)) {
ss << (c++ ? ",":"") << i;
mon->try_send_message(new MOSDScrub(osdmap.get_fsid(),
- m->cmd[1] == "repair"),
+ m->cmd[1] == "repair",
+ m->cmd[1] == "deep-scrub"),
osdmap.get_inst(i));
}
r = 0;
@@ -1671,7 +1674,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
long osd = strtol(m->cmd[2].c_str(), 0, 10);
if (osdmap.is_up(osd)) {
mon->try_send_message(new MOSDScrub(osdmap.get_fsid(),
- m->cmd[1] == "repair"),
+ m->cmd[1] == "repair",
+ m->cmd[1] == "deep-scrub"),
osdmap.get_inst(osd));
r = 0;
ss << "osd." << osd << " instructed to " << m->cmd[1];
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index accc1b73a20..f056271e227 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -464,7 +464,7 @@ void PGMap::dump_osd_stats(Formatter *f) const
void PGMap::dump_pg_stats_plain(ostream& ss,
const hash_map<pg_t, pg_stat_t>& pg_stats) const
{
- ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp" << std::endl;
+ ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp\tlast_deep_scrub\tdeep_scrub_stamp" << std::endl;
for (hash_map<pg_t, pg_stat_t>::const_iterator i = pg_stats.begin();
i != pg_stats.end(); ++i) {
const pg_stat_t &st(i->second);
@@ -484,6 +484,7 @@ void PGMap::dump_pg_stats_plain(ostream& ss,
<< "\t" << st.up
<< "\t" << st.acting
<< "\t" << st.last_scrub << "\t" << st.last_scrub_stamp
+ << "\t" << st.last_deep_scrub << "\t" << st.last_deep_scrub_stamp
<< std::endl;
}
}
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index d18aeaa226b..a052bb85bd4 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -997,7 +997,9 @@ bool PGMonitor::preprocess_command(MMonCommand *m)
} else
ss << "invalid pgid '" << m->cmd[2] << "'";
}
- else if ((m->cmd[1] == "scrub" || m->cmd[1] == "repair") && m->cmd.size() == 3) {
+ else if ((m->cmd[1] == "scrub" ||
+ m->cmd[1] == "deep-scrub" ||
+ m->cmd[1] == "repair") && m->cmd.size() == 3) {
pg_t pgid;
r = -EINVAL;
if (pgid.parse(m->cmd[2].c_str())) {
@@ -1008,7 +1010,8 @@ bool PGMonitor::preprocess_command(MMonCommand *m)
vector<pg_t> pgs(1);
pgs[0] = pgid;
mon->try_send_message(new MOSDScrub(mon->monmap->fsid, pgs,
- m->cmd[1] == "repair"),
+ m->cmd[1] == "repair",
+ m->cmd[1] == "deep-scrub"),
mon->osdmon()->osdmap.get_inst(osd));
ss << "instructing pg " << pgid << " on osd." << osd << " to " << m->cmd[1];
r = 0;
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 271ec8f2d93..5add6750558 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -3257,6 +3257,8 @@ void OSD::handle_scrub(MOSDScrub *m)
if (pg->is_primary()) {
if (m->repair)
pg->state_set(PG_STATE_REPAIR);
+ if (m->deep)
+ pg->state_set(PG_STATE_DEEP_SCRUB);
if (pg->queue_scrub()) {
dout(10) << "queueing " << *pg << " for scrub" << dendl;
}
@@ -3273,6 +3275,8 @@ void OSD::handle_scrub(MOSDScrub *m)
if (pg->is_primary()) {
if (m->repair)
pg->state_set(PG_STATE_REPAIR);
+ if (m->deep)
+ pg->state_set(PG_STATE_DEEP_SCRUB);
if (pg->queue_scrub()) {
dout(10) << "queueing " << *pg << " for scrub" << dendl;
}
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 3e8135b4750..ff8555efc94 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1926,6 +1926,8 @@ void PG::update_stats()
info.stats.created = info.history.epoch_created;
info.stats.last_scrub = info.history.last_scrub;
info.stats.last_scrub_stamp = info.history.last_scrub_stamp;
+ info.stats.last_deep_scrub = info.history.last_deep_scrub;
+ info.stats.last_deep_scrub_stamp = info.history.last_deep_scrub_stamp;
info.stats.last_epoch_clean = info.history.last_epoch_clean;
utime_t now = ceph_clock_now(g_ceph_context);
@@ -2712,6 +2714,11 @@ bool PG::sched_scrub()
return true;
}
+ if (ceph_clock_now(g_ceph_context) > info.history.last_deep_scrub_stamp + g_conf->osd_deep_scrub_interval) {
+ dout(10) << "sched_scrub: scrub will be deep" << dendl;
+ scrubber.deep = true;
+ }
+
bool ret = false;
if (!scrubber.reserved) {
assert(scrubber.reserved_peers.empty());
@@ -2804,9 +2811,10 @@ void PG::sub_op_scrub_map(OpRequestRef op)
/*
* pg lock may or may not be held
*/
-void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls)
+void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep)
{
- dout(10) << "_scan_list scanning " << ls.size() << " objects" << dendl;
+ dout(10) << "_scan_list scanning " << ls.size() << " objects"
+ << (deep ? " deeply" : "") << dendl;
int i = 0;
for (vector<hobject_t>::iterator p = ls.begin();
p != ls.end();
@@ -2820,6 +2828,23 @@ void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls)
o.size = st.st_size;
assert(!o.negative);
osd->store->getattrs(coll, poid, o.attrs);
+
+ // calculate the CRC32 on deep scrubs
+ if (deep) {
+ bufferhash h;
+ bufferlist bl;
+ int r;
+ __u64 pos = 0;
+ while ( (r = osd->store->read(coll, poid, pos,
+ g_conf->osd_deep_scrub_stride, bl)) > 0) {
+ h << bl;
+ pos += bl.length();
+ bl.clear();
+ }
+ o.digest = h.digest();
+ o.digest_present = true;
+ }
+
dout(25) << "_scan_list " << poid << dendl;
} else {
dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl;
@@ -2840,13 +2865,15 @@ void PG::_request_scrub_map_classic(int replica, eversion_t version)
}
// send scrub v3 messages (chunky scrub)
-void PG::_request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end)
+void PG::_request_scrub_map(int replica, eversion_t version,
+ hobject_t start, hobject_t end,
+ bool deep)
{
assert(replica != osd->whoami);
dout(10) << "scrub requesting scrubmap from osd." << replica << dendl;
MOSDRepScrub *repscrubop = new MOSDRepScrub(info.pgid, version,
get_osdmap()->get_epoch(),
- start, end);
+ start, end, deep);
osd->cluster_messenger->send_message(repscrubop,
get_osdmap()->get_cluster_inst(replica));
}
@@ -2977,7 +3004,8 @@ void PG::scrub_unreserve_replicas()
* build a scrub map over a chunk without releasing the lock
* only used by chunky scrub
*/
-int PG::build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end)
+int PG::build_scrub_map_chunk(ScrubMap &map,
+ hobject_t start, hobject_t end, bool deep)
{
dout(10) << "build_scrub_map" << dendl;
dout(20) << "scrub_map_chunk [" << start << "," << end << ")" << dendl;
@@ -2992,7 +3020,7 @@ int PG::build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end)
return ret;
}
- _scan_list(map, ls);
+ _scan_list(map, ls, deep);
// pg attrs
osd->store->collection_getattrs(coll, map.attrs);
@@ -3025,7 +3053,7 @@ void PG::build_scrub_map(ScrubMap &map)
vector<hobject_t> ls;
osd->store->collection_list(coll, ls);
- _scan_list(map, ls);
+ _scan_list(map, ls, false);
lock();
if (epoch != info.history.same_interval_since) {
@@ -3073,7 +3101,7 @@ void PG::build_inc_scrub_map(ScrubMap &map, eversion_t v)
}
}
- _scan_list(map, ls);
+ _scan_list(map, ls, false);
// pg attrs
osd->store->collection_getattrs(coll, map.attrs);
@@ -3155,7 +3183,7 @@ void PG::replica_scrub(MOSDRepScrub *msg)
return;
}
- build_scrub_map_chunk(map, msg->start, msg->end);
+ build_scrub_map_chunk(map, msg->start, msg->end, msg->deep);
} else {
if (msg->scrub_from > eversion_t()) {
@@ -3216,7 +3244,7 @@ void PG::scrub()
return;
}
- // when the scrub is not active, we need to determine which type of scrub to do
+ // when we're starting a scrub, we need to determine which type of scrub to do
if (!scrubber.active) {
OSDMapRef curmap = osd->get_osdmap();
scrubber.is_chunky = true;
@@ -3231,6 +3259,12 @@ void PG::scrub()
}
}
+ if (scrubber.is_chunky) {
+ scrubber.deep = state_test(PG_STATE_DEEP_SCRUB);
+ } else {
+ state_clear(PG_STATE_DEEP_SCRUB);
+ }
+
dout(10) << "starting a new " << (scrubber.is_chunky ? "chunky" : "classic") << " scrub" << dendl;
}
@@ -3548,7 +3582,7 @@ void PG::chunky_scrub() {
// request maps from replicas
for (unsigned i=1; i<acting.size(); i++) {
_request_scrub_map(acting[i], scrubber.subset_last_update,
- scrubber.start, scrubber.end);
+ scrubber.start, scrubber.end, scrubber.deep);
scrubber.waiting_on_whom.insert(acting[i]);
++scrubber.waiting_on;
}
@@ -3580,7 +3614,9 @@ void PG::chunky_scrub() {
assert(last_update_applied >= scrubber.subset_last_update);
// build my own scrub map
- ret = build_scrub_map_chunk(scrubber.primary_scrubmap, scrubber.start, scrubber.end);
+ ret = build_scrub_map_chunk(scrubber.primary_scrubmap,
+ scrubber.start, scrubber.end,
+ scrubber.deep);
if (ret < 0) {
dout(5) << "error building scrub map: " << ret << ", aborting" << dendl;
scrub_clear_state();
@@ -3645,6 +3681,7 @@ void PG::scrub_clear_state()
assert(_lock.is_locked());
state_clear(PG_STATE_SCRUBBING);
state_clear(PG_STATE_REPAIR);
+ state_clear(PG_STATE_DEEP_SCRUB);
update_stats();
// active -> nothing.
@@ -3691,6 +3728,16 @@ bool PG::_compare_scrub_objects(ScrubMap::object &auth,
errorstream << "size " << candidate.size
<< " != known size " << auth.size;
}
+ if (auth.digest_present && candidate.digest_present) {
+ if (auth.digest != candidate.digest) {
+ if (!ok)
+ errorstream << ", ";
+ ok = false;
+
+ errorstream << "digest " << candidate.digest
+ << " != known digest " << auth.digest;
+ }
+ }
for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
i != auth.attrs.end();
i++) {
@@ -3755,6 +3802,7 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps,
j->second->objects[*k],
ss)) {
cur_inconsistent.insert(j->first);
+ ++scrubber.errors;
errorstream << info.pgid << " osd." << acting[j->first]
<< ": soid " << *k << " " << ss.str() << std::endl;
}
@@ -3781,7 +3829,8 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps,
void PG::scrub_compare_maps() {
dout(10) << "scrub_compare_maps has maps, analyzing" << dendl;
bool repair = state_test(PG_STATE_REPAIR);
- const char *mode = repair ? "repair":"scrub";
+ bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
+ const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
if (acting.size() > 1) {
dout(10) << "scrub comparing replica scrub maps" << dendl;
@@ -3837,6 +3886,7 @@ void PG::scrub_compare_maps() {
&maps[i->second]->objects[i->first],
acting[*j],
acting[i->second]);
+ ++scrubber.fixed;
}
}
@@ -3883,7 +3933,8 @@ void PG::scrub_finalize() {
// the part that actually finalizes a scrub
void PG::scrub_finish() {
bool repair = state_test(PG_STATE_REPAIR);
- const char *mode = repair ? "repair":"scrub";
+ bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
+ const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
// type-specific finish (can tally more errors)
_scrub_finish();
@@ -3911,6 +3962,10 @@ void PG::scrub_finish() {
osd->unreg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp);
info.history.last_scrub = info.last_update;
info.history.last_scrub_stamp = ceph_clock_now(g_ceph_context);
+ if (scrubber.deep) {
+ info.history.last_deep_scrub = info.last_update;
+ info.history.last_deep_scrub_stamp = ceph_clock_now(g_ceph_context);
+ }
osd->reg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp);
{
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 5c39cd89f10..0ad34514ea8 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -778,7 +778,8 @@ public:
reserved(false), reserve_failed(false),
block_writes(false), active(false), waiting_on(0),
errors(0), fixed(0), active_rep_scrub(0),
- finalizing(false), is_chunky(false), state(INACTIVE)
+ finalizing(false), is_chunky(false), state(INACTIVE),
+ deep(false)
{
}
@@ -818,6 +819,9 @@ public:
FINISH,
} state;
+ // deep scrub
+ bool deep;
+
static const char *state_string(const PG::Scrubber::State& state) {
const char *ret = NULL;
switch( state )
@@ -855,6 +859,7 @@ public:
subset_last_update = eversion_t();
errors = 0;
fixed = 0;
+ deep = false;
}
} scrubber;
@@ -878,10 +883,12 @@ public:
void scrub_finish();
void scrub_clear_state();
bool scrub_gather_replica_maps();
- void _scan_list(ScrubMap &map, vector<hobject_t> &ls);
+ void _scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep);
void _request_scrub_map_classic(int replica, eversion_t version);
- void _request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end);
- int build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end);
+ void _request_scrub_map(int replica, eversion_t version,
+ hobject_t start, hobject_t end, bool deep);
+ int build_scrub_map_chunk(ScrubMap &map,
+ hobject_t start, hobject_t end, bool deep);
void build_scrub_map(ScrubMap &map);
void build_inc_scrub_map(ScrubMap &map, eversion_t v);
virtual void _scrub(ScrubMap &map) { }
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 01cab81e52b..332e0501b9e 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -6596,7 +6596,8 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap)
coll_t c(info.pgid);
bool repair = state_test(PG_STATE_REPAIR);
- const char *mode = repair ? "repair":"scrub";
+ bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
+ const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
// traverse in reverse order.
hobject_t head;
@@ -6685,6 +6686,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap)
//bufferlist data;
//osd->store->read(c, poid, 0, 0, data);
//assert(data.length() == p->size);
+ //
if (soid.snap == CEPH_NOSNAP) {
if (!snapset.head_exists) {
@@ -6737,7 +6739,8 @@ void ReplicatedPG::_scrub_clear_state()
void ReplicatedPG::_scrub_finish()
{
bool repair = state_test(PG_STATE_REPAIR);
- const char *mode = repair ? "repair":"scrub";
+ bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
+ const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
dout(10) << mode << " got "
<< scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, "
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index cdbdfa95704..a981d02f900 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -406,6 +406,8 @@ std::string pg_state_string(int state)
oss << "remapped+";
if (state & PG_STATE_SCRUBBING)
oss << "scrubbing+";
+ if (state & PG_STATE_DEEP_SCRUB)
+ oss << "deep+";
if (state & PG_STATE_SCRUBQ)
oss << "scrubq+";
if (state & PG_STATE_INCONSISTENT)
@@ -971,6 +973,8 @@ void pg_stat_t::dump(Formatter *f) const
f->dump_unsigned("parent_split_bits", parent_split_bits);
f->dump_stream("last_scrub") << last_scrub;
f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
+ f->dump_stream("last_deep_scrub") << last_deep_scrub;
+ f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
f->dump_unsigned("log_size", log_size);
f->dump_unsigned("ondisk_log_size", ondisk_log_size);
stats.dump(f);
@@ -986,7 +990,7 @@ void pg_stat_t::dump(Formatter *f) const
void pg_stat_t::encode(bufferlist &bl) const
{
- ENCODE_START(9, 8, bl);
+ ENCODE_START(10, 8, bl);
::encode(version, bl);
::encode(reported, bl);
::encode(state, bl);
@@ -1009,12 +1013,14 @@ void pg_stat_t::encode(bufferlist &bl) const
::encode(last_clean, bl);
::encode(last_unstale, bl);
::encode(mapping_epoch, bl);
+ ::encode(last_deep_scrub, bl);
+ ::encode(last_deep_scrub_stamp, bl);
ENCODE_FINISH(bl);
}
void pg_stat_t::decode(bufferlist::iterator &bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(9, 8, 8, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(10, 8, 8, bl);
::decode(version, bl);
::decode(reported, bl);
::decode(state, bl);
@@ -1072,6 +1078,10 @@ void pg_stat_t::decode(bufferlist::iterator &bl)
::decode(last_clean, bl);
::decode(last_unstale, bl);
::decode(mapping_epoch, bl);
+ if (struct_v >= 10) {
+ ::decode(last_deep_scrub, bl);
+ ::decode(last_deep_scrub_stamp, bl);
+ }
}
}
DECODE_FINISH(bl);
@@ -1099,6 +1109,8 @@ void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
a.parent_split_bits = 12;
a.last_scrub = eversion_t(9, 10);
a.last_scrub_stamp = utime_t(11, 12);
+ a.last_deep_scrub = eversion_t(13, 14);
+ a.last_deep_scrub_stamp = utime_t(15, 16);
list<object_stat_collection_t*> l;
object_stat_collection_t::generate_test_instances(l);
a.stats = *l.back();
@@ -1177,7 +1189,7 @@ void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
void pg_history_t::encode(bufferlist &bl) const
{
- ENCODE_START(4, 4, bl);
+ ENCODE_START(5, 4, bl);
::encode(epoch_created, bl);
::encode(last_epoch_started, bl);
::encode(last_epoch_clean, bl);
@@ -1187,12 +1199,14 @@ void pg_history_t::encode(bufferlist &bl) const
::encode(same_primary_since, bl);
::encode(last_scrub, bl);
::encode(last_scrub_stamp, bl);
+ ::encode(last_deep_scrub, bl);
+ ::encode(last_deep_scrub_stamp, bl);
ENCODE_FINISH(bl);
}
void pg_history_t::decode(bufferlist::iterator &bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, bl);
::decode(epoch_created, bl);
::decode(last_epoch_started, bl);
if (struct_v >= 3)
@@ -1206,6 +1220,10 @@ void pg_history_t::decode(bufferlist::iterator &bl)
if (struct_v >= 2) {
::decode(last_scrub, bl);
::decode(last_scrub_stamp, bl);
+ if (struct_v >= 5) {
+ ::decode(last_deep_scrub, bl);
+ ::decode(last_deep_scrub_stamp, bl);
+ }
}
DECODE_FINISH(bl);
}
@@ -1221,6 +1239,8 @@ void pg_history_t::dump(Formatter *f) const
f->dump_int("same_primary_since", same_primary_since);
f->dump_stream("last_scrub") << last_scrub;
f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
+ f->dump_stream("last_deep_scrub") << last_deep_scrub;
+ f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
}
void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
@@ -1235,7 +1255,9 @@ void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
o.back()->same_interval_since = 6;
o.back()->same_primary_since = 7;
o.back()->last_scrub = eversion_t(8, 9);
- o.back()->last_scrub_stamp = utime_t(10, 11);
+ o.back()->last_scrub_stamp = utime_t(10, 11);
+ o.back()->last_deep_scrub = eversion_t(12, 13);
+ o.back()->last_deep_scrub_stamp = utime_t(14, 15);
}
@@ -2591,19 +2613,29 @@ void ScrubMap::generate_test_instances(list<ScrubMap*>& o)
void ScrubMap::object::encode(bufferlist& bl) const
{
- ENCODE_START(2, 2, bl);
+ ENCODE_START(3, 2, bl);
::encode(size, bl);
::encode(negative, bl);
::encode(attrs, bl);
+ ::encode(digest, bl);
+ ::encode(digest_present, bl);
ENCODE_FINISH(bl);
}
void ScrubMap::object::decode(bufferlist::iterator& bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
::decode(size, bl);
::decode(negative, bl);
::decode(attrs, bl);
+ if (struct_v >= 3) {
+ ::decode(digest, bl);
+ ::decode(digest_present, bl);
+ }
+ else {
+ digest = 0;
+ digest_present = false;
+ }
DECODE_FINISH(bl);
}
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 7cb590bdea6..b2b59b33f18 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -564,6 +564,7 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) {
#define PG_STATE_INCOMPLETE (1<<16) // incomplete content, peering failed.
#define PG_STATE_STALE (1<<17) // our state for this pg is stale, unknown.
#define PG_STATE_REMAPPED (1<<18) // pg is explicitly remapped to different OSDs than CRUSH
+#define PG_STATE_DEEP_SCRUB (1<<19) // deep scrub: check CRC32 on files
std::string pg_state_string(int state);
@@ -862,7 +863,9 @@ struct pg_stat_t {
__u32 parent_split_bits;
eversion_t last_scrub;
+ eversion_t last_deep_scrub;
utime_t last_scrub_stamp;
+ utime_t last_deep_scrub_stamp;
object_stat_collection_t stats;
@@ -951,7 +954,9 @@ struct pg_history_t {
epoch_t same_primary_since; // same primary at least back through this epoch.
eversion_t last_scrub;
+ eversion_t last_deep_scrub;
utime_t last_scrub_stamp;
+ utime_t last_deep_scrub_stamp;
pg_history_t()
: epoch_created(0),
@@ -985,6 +990,14 @@ struct pg_history_t {
last_scrub_stamp = other.last_scrub_stamp;
modified = true;
}
+ if (other.last_deep_scrub > last_deep_scrub) {
+ last_deep_scrub = other.last_deep_scrub;
+ modified = true;
+ }
+ if (other.last_deep_scrub_stamp > last_deep_scrub_stamp) {
+ last_deep_scrub_stamp = other.last_deep_scrub_stamp;
+ modified = true;
+ }
return modified;
}
@@ -1777,8 +1790,10 @@ struct ScrubMap {
uint64_t size;
bool negative;
map<string,bufferptr> attrs;
+ __u32 digest;
+ bool digest_present;
- object(): size(0), negative(false) {}
+ object(): size(0), negative(false), digest(0), digest_present(false) {}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
diff --git a/src/test/cli/ceph/help.t b/src/test/cli/ceph/help.t
index 47e5b385008..bbd7ff76d9a 100644
--- a/src/test/cli/ceph/help.t
+++ b/src/test/cli/ceph/help.t
@@ -56,6 +56,7 @@
ceph osd pool rename <pool> <new pool name>
ceph osd pool set <pool> <field> <value>
ceph osd scrub <osd-id>
+ ceph osd deep-scrub <osd-id>
ceph osd repair <osd-id>
ceph osd tell N bench [bytes per write] [total bytes]
diff --git a/src/tools/ceph.cc b/src/tools/ceph.cc
index b82be10b9bf..278033c46cb 100644
--- a/src/tools/ceph.cc
+++ b/src/tools/ceph.cc
@@ -99,6 +99,7 @@ static void usage()
cout << " ceph osd pool rename <pool> <new pool name>\n";
cout << " ceph osd pool set <pool> <field> <value>\n";
cout << " ceph osd scrub <osd-id>\n";
+ cout << " ceph osd deep-scrub <osd-id>\n";
cout << " ceph osd repair <osd-id>\n";
cout << " ceph osd tell N bench [bytes per write] [total bytes]\n";
cout << "\n";