summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Donnelly <pdonnell@redhat.com>2021-07-14 18:02:20 -0700
committerPatrick Donnelly <pdonnell@redhat.com>2021-07-29 08:50:15 -0700
commit75661374ff7fa14bf9bc053b9389571d8aa68f7a (patch)
tree1c7ca5268e42cbef06bd478465e3954536a756ae
parent60ad0ca622cf64dad99f89cea2b520dce1322fbf (diff)
downloadceph-75661374ff7fa14bf9bc053b9389571d8aa68f7a.tar.gz
mon/MDSMonitor: propose if FSMap struct_v is too old
To flush older versions which may still be an empty MDSMap (for clusters that have never used CephFS), we need to force a proposal so older versions of the struct are trimmed. This is the main fix of this branch. We removed code which processed old encodings of the MDSMap in the mon store via 60bc524. That broke old ceph clusters which never used CephFS (see cited ticket below). This is because the initial epoch is an empty MDSMap (back in Infernalis/Hammer) that is never updated. So, the fix here is to just do proposals periodically until all of the old structs are automatically trimmed by the mons. Fixes: 60bc524827bac072658203e56b1fa3dede9641c5 Fixes: https://tracker.ceph.com/issues/51673 Signed-off-by: Patrick Donnelly <pdonnell@redhat.com> (cherry picked from commit 56c3fc802ee8848ba85da4300adcc2ee8bd95416) Conflicts: src/mds/FSMap.cc: adjust for octopus which decodes old MDSMaps src/mon/MDSMonitor.h: trivial conflicts
-rw-r--r--src/mds/FSMap.cc5
-rw-r--r--src/mds/FSMap.h14
-rw-r--r--src/mon/MDSMonitor.cc28
-rw-r--r--src/mon/MDSMonitor.h7
4 files changed, 51 insertions, 3 deletions
diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc
index 8f4cffa43a0..f8bebc0507d 100644
--- a/src/mds/FSMap.cc
+++ b/src/mds/FSMap.cc
@@ -472,7 +472,7 @@ void FSMap::update_compat(const CompatSet &c)
void FSMap::encode(bufferlist& bl, uint64_t features) const
{
- ENCODE_START(7, 6, bl);
+ ENCODE_START(STRUCT_VERSION, 6, bl);
encode(epoch, bl);
encode(next_filesystem_id, bl);
encode(legacy_client_fscid, bl);
@@ -497,7 +497,8 @@ void FSMap::decode(bufferlist::const_iterator& p)
// MDSMonitor to store an FSMap instead of an MDSMap was
// 5, so anything older than 6 is decoded as an MDSMap,
// and anything newer is decoded as an FSMap.
- DECODE_START_LEGACY_COMPAT_LEN_16(7, 4, 4, p);
+ DECODE_START_LEGACY_COMPAT_LEN_16(STRUCT_VERSION, 4, 4, p);
+ struct_version = struct_v;
if (struct_v < 6) {
// Because the mon used to store an MDSMap where we now
// store an FSMap, FSMap knows how to decode the legacy
diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h
index feed962c818..72e57231d0e 100644
--- a/src/mds/FSMap.h
+++ b/src/mds/FSMap.h
@@ -88,6 +88,9 @@ public:
friend class PaxosFSMap;
using mds_info_t = MDSMap::mds_info_t;
+ static const version_t STRUCT_VERSION = 7;
+ static const version_t STRUCT_VERSION_TRIM_TO = 7;
+
FSMap() : compat(MDSMap::get_compat_set_default()) {}
FSMap(const FSMap &rhs)
@@ -100,7 +103,8 @@ public:
ever_enabled_multiple(rhs.ever_enabled_multiple),
mds_roles(rhs.mds_roles),
standby_daemons(rhs.standby_daemons),
- standby_epochs(rhs.standby_epochs)
+ standby_epochs(rhs.standby_epochs),
+ struct_version(rhs.struct_version)
{
filesystems.clear();
for (const auto &i : rhs.filesystems) {
@@ -339,6 +343,11 @@ public:
epoch_t get_epoch() const { return epoch; }
void inc_epoch() { epoch++; }
+ version_t get_struct_version() const { return struct_version; }
+ bool is_struct_old() const {
+ return struct_version < STRUCT_VERSION_TRIM_TO;
+ }
+
size_t filesystem_count() const {return filesystems.size();}
bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
Filesystem::const_ref get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
@@ -409,6 +418,9 @@ protected:
// For MDS daemons not yet assigned to a Filesystem
std::map<mds_gid_t, mds_info_t> standby_daemons;
std::map<mds_gid_t, epoch_t> standby_epochs;
+
+private:
+ epoch_t struct_version = 0;
};
WRITE_CLASS_ENCODER_FEATURES(FSMap)
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index 1b7f5dfc31a..21604acb847 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -2179,6 +2179,34 @@ void MDSMonitor::tick()
bool do_propose = false;
bool propose_osdmap = false;
+ if (check_fsmap_struct_version) {
+ /* Allow time for trimming otherwise PaxosService::is_writeable will always
+ * be false.
+ */
+
+ auto now = clock::now();
+ auto elapsed = now - last_fsmap_struct_flush;
+ if (elapsed > std::chrono::seconds(30)) {
+ FSMap fsmap;
+ bufferlist bl;
+ auto v = get_first_committed();
+ int err = get_version(v, bl);
+ if (err) {
+ derr << "could not get version " << v << dendl;
+ ceph_abort();
+ }
+ fsmap.decode(bl);
+ if (fsmap.is_struct_old()) {
+ dout(5) << "fsmap struct is too old; proposing to flush out old versions" << dendl;
+ do_propose = true;
+ last_fsmap_struct_flush = now;
+ } else {
+ dout(20) << "struct is recent" << dendl;
+ check_fsmap_struct_version = false;
+ }
+ }
+ }
+
do_propose |= pending.check_health();
/* Check health and affinity of ranks */
diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h
index 56762a8afb7..51be614dae1 100644
--- a/src/mon/MDSMonitor.h
+++ b/src/mon/MDSMonitor.h
@@ -32,6 +32,9 @@ class FileSystemCommandHandler;
class MDSMonitor : public PaxosService, public PaxosFSMap, protected CommandHandler {
public:
+ using clock = ceph::coarse_mono_clock;
+ using time = ceph::coarse_mono_time;
+
MDSMonitor(Monitor *mn, Paxos *p, string service_name);
// service methods
@@ -142,6 +145,10 @@ protected:
// when the mon was not updating us for some period (e.g. during slow
// election) to reset last_beacon timeouts
mono_time last_tick = mono_clock::zero();
+
+private:
+ time last_fsmap_struct_flush = clock::zero();
+ bool check_fsmap_struct_version = true;
};
#endif