summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Farnum <greg@inktank.com>2013-04-30 12:02:20 -0700
committerGreg Farnum <greg@inktank.com>2013-04-30 13:50:40 -0700
commitcedcb1934f64325a452468576ead56ed11e7a03a (patch)
tree0ac68c3dc23317a28940b797da6bcccfc722b4c5
parent6ae9bbb5d03cb5695a4ebb7a3c20f729de1bd67a (diff)
downloadceph-cedcb1934f64325a452468576ead56ed11e7a03a.tar.gz
Revert "mon: when electing, be sure acked leaders have new enough stores to lead"
This was somehow broken -- out-of-date leaders were being elected -- and we've decided smaller band-aids are more appropriate. We don't completely revert the MMonElection changes, though -- there have been user clusters running the code which includes these messages so we can't pretend it never happened. We can make them clearly unused in the code, though. This reverts commit fcaabf1a22723c571c10d402464071c6405607c0. Signed-off-by: Greg Farnum <greg@inktank.com>
-rw-r--r--src/messages/MMonElection.h23
-rw-r--r--src/mon/Elector.cc34
-rw-r--r--src/mon/Elector.h14
3 files changed, 25 insertions, 46 deletions
diff --git a/src/messages/MMonElection.h b/src/messages/MMonElection.h
index 9771f6123d6..3d7dd4ec90e 100644
--- a/src/messages/MMonElection.h
+++ b/src/messages/MMonElection.h
@@ -45,19 +45,20 @@ public:
bufferlist monmap_bl;
set<int> quorum;
uint64_t quorum_features;
- version_t paxos_first_version;
- version_t paxos_last_version;
+ /* the following were both used in the next branch for a while
+ * on user cluster, so we've left them in for compatibility. */
+ version_t defunct_one;
+ version_t defunct_two;
MMonElection() : Message(MSG_MON_ELECTION, HEAD_VERSION, COMPAT_VERSION),
- op(0), epoch(0), quorum_features(0), paxos_first_version(0),
- paxos_last_version(0)
+ op(0), epoch(0), quorum_features(0), defunct_one(0),
+ defunct_two(0)
{ }
- MMonElection(int o, epoch_t e, MonMap *m,
- version_t paxos_first, version_t paxos_last)
+ MMonElection(int o, epoch_t e, MonMap *m)
: Message(MSG_MON_ELECTION, HEAD_VERSION, COMPAT_VERSION),
fsid(m->fsid), op(o), epoch(e), quorum_features(0),
- paxos_first_version(paxos_first), paxos_last_version(paxos_last)
+ defunct_one(0), defunct_two(0)
{
// encode using full feature set; we will reencode for dest later,
// if necessary
@@ -87,8 +88,8 @@ public:
::encode(monmap_bl, payload);
::encode(quorum, payload);
::encode(quorum_features, payload);
- ::encode(paxos_first_version, payload);
- ::encode(paxos_last_version, payload);
+ ::encode(defunct_one, payload);
+ ::encode(defunct_two, payload);
}
void decode_payload() {
bufferlist::iterator p = payload.begin();
@@ -105,8 +106,8 @@ public:
else
quorum_features = 0;
if (header.version >= 4) {
- ::decode(paxos_first_version, p);
- ::decode(paxos_last_version, p);
+ ::decode(defunct_one, p);
+ ::decode(defunct_two, p);
}
}
diff --git a/src/mon/Elector.cc b/src/mon/Elector.cc
index b6f047e20d2..32d78b4eb4b 100644
--- a/src/mon/Elector.cc
+++ b/src/mon/Elector.cc
@@ -81,21 +81,18 @@ void Elector::start()
electing_me = true;
acked_me[mon->rank] = CEPH_FEATURES_ALL;
leader_acked = -1;
- acked_first_paxos_version = mon->paxos->get_first_committed();
// bcast to everyone else
for (unsigned i=0; i<mon->monmap->size(); ++i) {
if ((int)i == mon->rank) continue;
- Message *m = new MMonElection(MMonElection::OP_PROPOSE, epoch, mon->monmap,
- mon->paxos->get_first_committed(),
- mon->paxos->get_version());
+ Message *m = new MMonElection(MMonElection::OP_PROPOSE, epoch, mon->monmap);
mon->messenger->send_message(m, mon->monmap->get_inst(i));
}
reset_timer();
}
-void Elector::defer(int who, version_t paxos_first)
+void Elector::defer(int who)
{
dout(5) << "defer to " << who << dendl;
@@ -107,11 +104,8 @@ void Elector::defer(int who, version_t paxos_first)
// ack them
leader_acked = who;
- acked_first_paxos_version = paxos_first;
ack_stamp = ceph_clock_now(g_ceph_context);
- mon->messenger->send_message(new MMonElection(MMonElection::OP_ACK, epoch, mon->monmap,
- mon->paxos->get_first_committed(),
- mon->paxos->get_version()),
+ mon->messenger->send_message(new MMonElection(MMonElection::OP_ACK, epoch, mon->monmap),
mon->monmap->get_inst(who));
// set a timer
@@ -175,10 +169,7 @@ void Elector::victory()
p != quorum.end();
++p) {
if (*p == mon->rank) continue;
- MMonElection *m = new MMonElection(MMonElection::OP_VICTORY, epoch,
- mon->monmap,
- mon->paxos->get_first_committed(),
- mon->paxos->get_version());
+ MMonElection *m = new MMonElection(MMonElection::OP_VICTORY, epoch, mon->monmap);
m->quorum = quorum;
mon->messenger->send_message(m, mon->monmap->get_inst(*p));
}
@@ -214,13 +205,10 @@ void Elector::handle_propose(MMonElection *m)
}
}
- if ((mon->rank < from) &&
- // be careful that we have new enough data to be leader!
- (m->paxos_first_version <= mon->paxos->get_version())) {
+ if (mon->rank < from) {
// i would win over them.
if (leader_acked >= 0) { // we already acked someone
- assert((leader_acked < from) || // and they still win, of course
- (acked_first_paxos_version > mon->paxos->get_version()));
+ assert(leader_acked < from); // and they still win, of course
dout(5) << "no, we already acked " << leader_acked << dendl;
} else {
// wait, i should win!
@@ -229,20 +217,16 @@ void Elector::handle_propose(MMonElection *m)
mon->start_election();
}
}
- } else if (m->paxos_last_version >= mon->paxos->get_first_committed()) {
+ } else {
// they would win over me
if (leader_acked < 0 || // haven't acked anyone yet, or
leader_acked > from || // they would win over who you did ack, or
- leader_acked == from) { // this is the guy we're already deferring to
- defer(from, m->paxos_first_version);
+ leader_acked == from) { // this is the guy we're already deferring to
+ defer(from);
} else {
// ignore them!
dout(5) << "no, we already acked " << leader_acked << dendl;
}
- } else { // they are too out-of-date
- dout(5) << "no, they are too far behind; paxos version: "
- << m->paxos_last_version << " versus my first "
- << mon->paxos->get_first_committed() << dendl;
}
m->put();
diff --git a/src/mon/Elector.h b/src/mon/Elector.h
index 9cce81e9f49..d81eb239763 100644
--- a/src/mon/Elector.h
+++ b/src/mon/Elector.h
@@ -126,10 +126,6 @@ class Elector {
*/
int leader_acked;
/**
- * Indicates the first_paxos_commit on who we've acked
- */
- version_t acked_first_paxos_version;
- /**
* Indicates when we have acked him
*/
utime_t ack_stamp;
@@ -201,17 +197,16 @@ class Elector {
* to become the Leader. We will only defer an election if the monitor we
* are deferring to outranks us.
*
- * @pre @p who outranks us (who < our rank, or we're behind their store)
+ * @pre @p who outranks us (i.e., who < our rank)
* @pre @p who outranks any other monitor we have deferred to in the past
* @post electing_me is false
* @post leader_acked equals @p who
* @post we sent an ack message to @p who
* @post we reset the expire_event timer
*
- * @param who Some other monitor's numeric identifier.
- * @param paxos_first The other monitor's first committed paxos version
+ * @param who Some other monitor's numeric identifier.
*/
- void defer(int who, version_t paxos_first);
+ void defer(int who);
/**
* The election has taken too long and has expired.
*
@@ -331,8 +326,7 @@ class Elector {
epoch(0),
participating(true),
electing_me(false),
- leader_acked(-1),
- acked_first_paxos_version(0) { }
+ leader_acked(-1) { }
/**
* Initiate the Elector class.