summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-07-12 15:21:20 -0700
committerSage Weil <sage@inktank.com>2013-07-12 15:21:20 -0700
commit65af2538329472d2fd078bb961863c40cdabda12 (patch)
tree44bb95a427e009c851a74ab750b8ba8b15136896
parente537699b33f84c14f027b56372fbcb0a99bbe88d (diff)
parent804314b8bfa5ec75cc9653e2928874c457395c92 (diff)
downloadceph-65af2538329472d2fd078bb961863c40cdabda12.tar.gz
Merge remote-tracking branch 'gh/wip-mon-sync-2' into cuttlefish
Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com> Reviewed-by: Greg Farnum <greg@inktank.com>
-rw-r--r--src/common/config_opts.h3
-rw-r--r--src/mon/Monitor.cc67
-rw-r--r--src/mon/Monitor.h14
-rw-r--r--src/mon/MonitorDBStore.h10
-rw-r--r--src/mon/Paxos.cc11
-rw-r--r--src/mon/Paxos.h10
6 files changed, 83 insertions, 32 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 11da6c5bb21..d040c9b0d9b 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -198,9 +198,10 @@ OPTION(mon_leveldb_compression, OPT_BOOL, false) // monitor's leveldb uses compr
OPTION(mon_leveldb_paranoid, OPT_BOOL, false) // monitor's leveldb paranoid flag
OPTION(mon_leveldb_log, OPT_STR, "")
OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state
-OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores
+OPTION(paxos_max_join_drift, OPT_INT, 100) // max paxos iterations before we must first sync the monitor stores
OPTION(paxos_propose_interval, OPT_DOUBLE, 1.0) // gather updates for this long before proposing a map update
OPTION(paxos_min_wait, OPT_DOUBLE, 0.05) // min time to gather updates for after period of inactivity
+OPTION(paxos_min, OPT_INT, 500) // minimum number of paxos transactions to keep around
OPTION(paxos_trim_min, OPT_INT, 500) // number of extra proposals tolerated before trimming
OPTION(paxos_trim_max, OPT_INT, 1000) // max number of extra proposals to trim at a time
OPTION(paxos_trim_disabled_max_versions, OPT_INT, 108000) // maximum amount of versions we shall allow passing by without trimming
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index bee72a71a5e..b3cb45a1572 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -1036,6 +1036,22 @@ void Monitor::sync_finish(entity_inst_t &entity, bool abort)
finish_contexts(g_ceph_context, maybe_wait_for_quorum);
}
+void Monitor::_trim_enable()
+{
+ Mutex::Locker l(trim_lock);
+ // even if we are no longer the leader, we should re-enable trim if
+ // we have disabled it in the past. It doesn't mean we are going to
+ // do anything about it, but if we happen to become the leader
+ // sometime down the future, we sure want to have the trim enabled.
+ if (trim_timeouts.empty()) {
+ dout(10) << __func__ << " enabling" << dendl;
+ paxos->trim_enable();
+ } else {
+ dout(10) << __func__ << " NOT enabling" << dendl;
+ }
+ trim_enable_timer = NULL;
+}
+
void Monitor::handle_sync_finish(MMonSync *m)
{
dout(10) << __func__ << " " << *m << dendl;
@@ -1214,14 +1230,30 @@ void Monitor::handle_sync_start_chunks(MMonSync *m)
}
SyncEntity sync = get_sync_entity(other, this);
- sync->version = paxos->get_version();
if (!m->last_key.first.empty() && !m->last_key.second.empty()) {
- sync->last_received_key = m->last_key;
- dout(10) << __func__ << " set last received key to ("
- << sync->last_received_key.first << ","
- << sync->last_received_key.second << ")" << dendl;
+ if (m->version == 0) {
+ // uh-oh; we can't do this safely without a proper version marker
+ // because we don't know what paxos commits they got from the
+ // previous keys (if any!), and we may miss some.
+ dout(1) << __func__ << " got mid-sync start_chunks from " << other
+ << " without version marker; ignoring last_received_key marker" << dendl;
+ sync->version = paxos->get_version();
+ } else {
+ sync->version = m->version;
+ sync->last_received_key = m->last_key;
+ dout(10) << __func__ << " set last received key to ("
+ << sync->last_received_key.first << ","
+ << sync->last_received_key.second << ")" << dendl;
+ }
+ } else {
+ sync->version = paxos->get_version();
}
+ dout(10) << __func__ << " version " << sync->version
+ << " last received key ("
+ << sync->last_received_key.first << ","
+ << sync->last_received_key.second << ")"
+ << dendl;
sync->sync_init();
@@ -1268,8 +1300,27 @@ void Monitor::sync_send_chunks(SyncEntity sync)
assert(sync->synchronizer->has_next_chunk());
MMonSync *msg = new MMonSync(MMonSync::OP_CHUNK);
+ MonitorDBStore::Transaction tx;
+
+ // include any recent paxos commits
+ if (sync->version < paxos->get_version()) {
+ while (sync->version < paxos->get_version()) { // FIXME: limit size?
+ sync->version++;
+ dout(10) << " including paxos version " << sync->version << dendl;
+ bufferlist bl;
+ store->get(paxos->get_name(), sync->version, bl);
+ tx.put(paxos->get_name(), sync->version, bl);
+ }
+ dout(10) << " included paxos through " << sync->version << dendl;
+ msg->version = sync->version;
+ }
+
+ // get next bunch of commits in the remaining space
+ sync->synchronizer->get_chunk_tx(tx);
+
+ if (!tx.empty())
+ tx.encode(msg->chunk_bl);
- sync->synchronizer->get_chunk(msg->chunk_bl);
msg->last_key = sync->synchronizer->get_last_key();
dout(10) << __func__ << " last key ("
<< msg->last_key.first << ","
@@ -1517,8 +1568,10 @@ void Monitor::sync_start_chunks(SyncEntity provider)
g_conf->mon_sync_timeout);
MMonSync *msg = new MMonSync(MMonSync::OP_START_CHUNKS);
pair<string,string> last_key = provider->last_received_key;
- if (!last_key.first.empty() && !last_key.second.empty())
+ if (!last_key.first.empty() && !last_key.second.empty()) {
msg->last_key = last_key;
+ msg->version = store->get("paxos", "last_committed");
+ }
assert(g_conf->mon_sync_requester_kill_at != 4);
messenger->send_message(msg, provider->entity);
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index 1dd100f6616..0c5e0fb8e0a 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -369,17 +369,12 @@ private:
C_TrimEnable(Monitor *m) : mon(m) { }
void finish(int r) {
- Mutex::Locker(mon->trim_lock);
- // even if we are no longer the leader, we should re-enable trim if
- // we have disabled it in the past. It doesn't mean we are going to
- // do anything about it, but if we happen to become the leader
- // sometime down the future, we sure want to have the trim enabled.
- if (mon->trim_timeouts.empty())
- mon->paxos->trim_enable();
- mon->trim_enable_timer = NULL;
+ mon->_trim_enable();
}
};
+ void _trim_enable();
+
void sync_obtain_latest_monmap(bufferlist &bl);
void sync_store_init();
void sync_store_cleanup();
@@ -628,9 +623,6 @@ private:
string prefix("paxos");
paxos_synchronizer = mon->store->get_synchronizer(prefix);
- version = mon->paxos->get_version();
- generic_dout(10) << __func__ << " version " << version << dendl;
-
synchronizer = mon->store->get_synchronizer(last_received_key,
sync_targets);
sync_update();
diff --git a/src/mon/MonitorDBStore.h b/src/mon/MonitorDBStore.h
index c140719981b..80bca0f0b59 100644
--- a/src/mon/MonitorDBStore.h
+++ b/src/mon/MonitorDBStore.h
@@ -277,7 +277,6 @@ class MonitorDBStore
return true;
}
- virtual void _get_chunk(Transaction &tx) = 0;
virtual bool _is_valid() = 0;
public:
@@ -292,9 +291,10 @@ class MonitorDBStore
virtual bool has_next_chunk() {
return !done && _is_valid();
}
- virtual void get_chunk(bufferlist &bl) {
+ virtual void get_chunk_tx(Transaction &tx) = 0;
+ virtual void get_chunk(bufferlist& bl) {
Transaction tx;
- _get_chunk(tx);
+ get_chunk_tx(tx);
if (!tx.empty())
tx.encode(bl);
}
@@ -325,7 +325,7 @@ class MonitorDBStore
* differ from the one passed on to the function)
* @param last_key[out] Last key in the chunk
*/
- virtual void _get_chunk(Transaction &tx) {
+ virtual void get_chunk_tx(Transaction &tx) {
assert(done == false);
assert(iter->valid() == true);
@@ -371,7 +371,7 @@ class MonitorDBStore
virtual ~SinglePrefixStoreIteratorImpl() { }
private:
- virtual void _get_chunk(Transaction &tx) {
+ virtual void get_chunk_tx(Transaction &tx) {
assert(done == false);
assert(iter->valid() == true);
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index 1b43da95546..01e06209872 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -1005,7 +1005,16 @@ void Paxos::trim_to(version_t first)
}
}
-void Paxos::trim_enable() {
+void Paxos::trim_disable()
+{
+ dout(10) << __func__ << dendl;
+ if (!trim_disabled_version)
+ trim_disabled_version = get_version();
+}
+
+void Paxos::trim_enable()
+{
+ dout(10) << __func__ << dendl;
trim_disabled_version = 0;
// We may not be the leader when we reach this function. We sure must
// have been the leader at some point, but we may have been demoted and
diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h
index 4f1af82836e..b7cfe6cea0a 100644
--- a/src/mon/Paxos.h
+++ b/src/mon/Paxos.h
@@ -1151,7 +1151,7 @@ public:
*/
void trim() {
assert(should_trim());
- version_t trim_to_version = MIN(get_version() - g_conf->paxos_max_join_drift,
+ version_t trim_to_version = MIN(get_version() - g_conf->paxos_min,
get_first_committed() + g_conf->paxos_trim_max);
trim_to(trim_to_version);
}
@@ -1161,10 +1161,7 @@ public:
* This is required by the Monitor's store synchronization mechanisms
* to guarantee a consistent store state.
*/
- void trim_disable() {
- if (!trim_disabled_version)
- trim_disabled_version = get_version();
- }
+ void trim_disable();
/**
* Enable trimming
*/
@@ -1185,8 +1182,7 @@ public:
*/
bool should_trim() {
int available_versions = (get_version() - get_first_committed());
- int maximum_versions =
- (g_conf->paxos_max_join_drift + g_conf->paxos_trim_min);
+ int maximum_versions = (g_conf->paxos_min + g_conf->paxos_trim_min);
if (going_to_trim || (available_versions <= maximum_versions))
return false;