diff options
author | Sage Weil <sage@inktank.com> | 2013-07-12 15:21:20 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-07-12 15:21:20 -0700 |
commit | 65af2538329472d2fd078bb961863c40cdabda12 (patch) | |
tree | 44bb95a427e009c851a74ab750b8ba8b15136896 | |
parent | e537699b33f84c14f027b56372fbcb0a99bbe88d (diff) | |
parent | 804314b8bfa5ec75cc9653e2928874c457395c92 (diff) | |
download | ceph-65af2538329472d2fd078bb961863c40cdabda12.tar.gz |
Merge remote-tracking branch 'gh/wip-mon-sync-2' into cuttlefish
Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
-rw-r--r-- | src/common/config_opts.h | 3 | ||||
-rw-r--r-- | src/mon/Monitor.cc | 67 | ||||
-rw-r--r-- | src/mon/Monitor.h | 14 | ||||
-rw-r--r-- | src/mon/MonitorDBStore.h | 10 | ||||
-rw-r--r-- | src/mon/Paxos.cc | 11 | ||||
-rw-r--r-- | src/mon/Paxos.h | 10 |
6 files changed, 83 insertions, 32 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 11da6c5bb21..d040c9b0d9b 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -198,9 +198,10 @@ OPTION(mon_leveldb_compression, OPT_BOOL, false) // monitor's leveldb uses compr OPTION(mon_leveldb_paranoid, OPT_BOOL, false) // monitor's leveldb paranoid flag OPTION(mon_leveldb_log, OPT_STR, "") OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state -OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores +OPTION(paxos_max_join_drift, OPT_INT, 100) // max paxos iterations before we must first sync the monitor stores OPTION(paxos_propose_interval, OPT_DOUBLE, 1.0) // gather updates for this long before proposing a map update OPTION(paxos_min_wait, OPT_DOUBLE, 0.05) // min time to gather updates for after period of inactivity +OPTION(paxos_min, OPT_INT, 500) // minimum number of paxos transactions to keep around OPTION(paxos_trim_min, OPT_INT, 500) // number of extra proposals tolerated before trimming OPTION(paxos_trim_max, OPT_INT, 1000) // max number of extra proposals to trim at a time OPTION(paxos_trim_disabled_max_versions, OPT_INT, 108000) // maximum amount of versions we shall allow passing by without trimming diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index bee72a71a5e..b3cb45a1572 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -1036,6 +1036,22 @@ void Monitor::sync_finish(entity_inst_t &entity, bool abort) finish_contexts(g_ceph_context, maybe_wait_for_quorum); } +void Monitor::_trim_enable() +{ + Mutex::Locker l(trim_lock); + // even if we are no longer the leader, we should re-enable trim if + // we have disabled it in the past. It doesn't mean we are going to + // do anything about it, but if we happen to become the leader + // sometime down the future, we sure want to have the trim enabled. + if (trim_timeouts.empty()) { + dout(10) << __func__ << " enabling" << dendl; + paxos->trim_enable(); + } else { + dout(10) << __func__ << " NOT enabling" << dendl; + } + trim_enable_timer = NULL; +} + void Monitor::handle_sync_finish(MMonSync *m) { dout(10) << __func__ << " " << *m << dendl; @@ -1214,14 +1230,30 @@ void Monitor::handle_sync_start_chunks(MMonSync *m) } SyncEntity sync = get_sync_entity(other, this); - sync->version = paxos->get_version(); if (!m->last_key.first.empty() && !m->last_key.second.empty()) { - sync->last_received_key = m->last_key; - dout(10) << __func__ << " set last received key to (" - << sync->last_received_key.first << "," - << sync->last_received_key.second << ")" << dendl; + if (m->version == 0) { + // uh-oh; we can't do this safely without a proper version marker + // because we don't know what paxos commits they got from the + // previous keys (if any!), and we may miss some. + dout(1) << __func__ << " got mid-sync start_chunks from " << other + << " without version marker; ignoring last_received_key marker" << dendl; + sync->version = paxos->get_version(); + } else { + sync->version = m->version; + sync->last_received_key = m->last_key; + dout(10) << __func__ << " set last received key to (" + << sync->last_received_key.first << "," + << sync->last_received_key.second << ")" << dendl; + } + } else { + sync->version = paxos->get_version(); } + dout(10) << __func__ << " version " << sync->version + << " last received key (" + << sync->last_received_key.first << "," + << sync->last_received_key.second << ")" + << dendl; sync->sync_init(); @@ -1268,8 +1300,27 @@ void Monitor::sync_send_chunks(SyncEntity sync) assert(sync->synchronizer->has_next_chunk()); MMonSync *msg = new MMonSync(MMonSync::OP_CHUNK); + MonitorDBStore::Transaction tx; + + // include any recent paxos commits + if (sync->version < paxos->get_version()) { + while (sync->version < paxos->get_version()) { // FIXME: limit size? + sync->version++; + dout(10) << " including paxos version " << sync->version << dendl; + bufferlist bl; + store->get(paxos->get_name(), sync->version, bl); + tx.put(paxos->get_name(), sync->version, bl); + } + dout(10) << " included paxos through " << sync->version << dendl; + msg->version = sync->version; + } + + // get next bunch of commits in the remaining space + sync->synchronizer->get_chunk_tx(tx); + + if (!tx.empty()) + tx.encode(msg->chunk_bl); - sync->synchronizer->get_chunk(msg->chunk_bl); msg->last_key = sync->synchronizer->get_last_key(); dout(10) << __func__ << " last key (" << msg->last_key.first << "," @@ -1517,8 +1568,10 @@ void Monitor::sync_start_chunks(SyncEntity provider) g_conf->mon_sync_timeout); MMonSync *msg = new MMonSync(MMonSync::OP_START_CHUNKS); pair<string,string> last_key = provider->last_received_key; - if (!last_key.first.empty() && !last_key.second.empty()) + if (!last_key.first.empty() && !last_key.second.empty()) { msg->last_key = last_key; + msg->version = store->get("paxos", "last_committed"); + } assert(g_conf->mon_sync_requester_kill_at != 4); messenger->send_message(msg, provider->entity); diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 1dd100f6616..0c5e0fb8e0a 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -369,17 +369,12 @@ private: C_TrimEnable(Monitor *m) : mon(m) { } void finish(int r) { - Mutex::Locker(mon->trim_lock); - // even if we are no longer the leader, we should re-enable trim if - // we have disabled it in the past. It doesn't mean we are going to - // do anything about it, but if we happen to become the leader - // sometime down the future, we sure want to have the trim enabled. - if (mon->trim_timeouts.empty()) - mon->paxos->trim_enable(); - mon->trim_enable_timer = NULL; + mon->_trim_enable(); } }; + void _trim_enable(); + void sync_obtain_latest_monmap(bufferlist &bl); void sync_store_init(); void sync_store_cleanup(); @@ -628,9 +623,6 @@ private: string prefix("paxos"); paxos_synchronizer = mon->store->get_synchronizer(prefix); - version = mon->paxos->get_version(); - generic_dout(10) << __func__ << " version " << version << dendl; - synchronizer = mon->store->get_synchronizer(last_received_key, sync_targets); sync_update(); diff --git a/src/mon/MonitorDBStore.h b/src/mon/MonitorDBStore.h index c140719981b..80bca0f0b59 100644 --- a/src/mon/MonitorDBStore.h +++ b/src/mon/MonitorDBStore.h @@ -277,7 +277,6 @@ class MonitorDBStore return true; } - virtual void _get_chunk(Transaction &tx) = 0; virtual bool _is_valid() = 0; public: @@ -292,9 +291,10 @@ class MonitorDBStore virtual bool has_next_chunk() { return !done && _is_valid(); } - virtual void get_chunk(bufferlist &bl) { + virtual void get_chunk_tx(Transaction &tx) = 0; + virtual void get_chunk(bufferlist& bl) { Transaction tx; - _get_chunk(tx); + get_chunk_tx(tx); if (!tx.empty()) tx.encode(bl); } @@ -325,7 +325,7 @@ class MonitorDBStore * differ from the one passed on to the function) * @param last_key[out] Last key in the chunk */ - virtual void _get_chunk(Transaction &tx) { + virtual void get_chunk_tx(Transaction &tx) { assert(done == false); assert(iter->valid() == true); @@ -371,7 +371,7 @@ class MonitorDBStore virtual ~SinglePrefixStoreIteratorImpl() { } private: - virtual void _get_chunk(Transaction &tx) { + virtual void get_chunk_tx(Transaction &tx) { assert(done == false); assert(iter->valid() == true); diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 1b43da95546..01e06209872 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -1005,7 +1005,16 @@ void Paxos::trim_to(version_t first) } } -void Paxos::trim_enable() { +void Paxos::trim_disable() +{ + dout(10) << __func__ << dendl; + if (!trim_disabled_version) + trim_disabled_version = get_version(); +} + +void Paxos::trim_enable() +{ + dout(10) << __func__ << dendl; trim_disabled_version = 0; // We may not be the leader when we reach this function. We sure must // have been the leader at some point, but we may have been demoted and diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h index 4f1af82836e..b7cfe6cea0a 100644 --- a/src/mon/Paxos.h +++ b/src/mon/Paxos.h @@ -1151,7 +1151,7 @@ public: */ void trim() { assert(should_trim()); - version_t trim_to_version = MIN(get_version() - g_conf->paxos_max_join_drift, + version_t trim_to_version = MIN(get_version() - g_conf->paxos_min, get_first_committed() + g_conf->paxos_trim_max); trim_to(trim_to_version); } @@ -1161,10 +1161,7 @@ public: * This is required by the Monitor's store synchronization mechanisms * to guarantee a consistent store state. */ - void trim_disable() { - if (!trim_disabled_version) - trim_disabled_version = get_version(); - } + void trim_disable(); /** * Enable trimming */ @@ -1185,8 +1182,7 @@ public: */ bool should_trim() { int available_versions = (get_version() - get_first_committed()); - int maximum_versions = - (g_conf->paxos_max_join_drift + g_conf->paxos_trim_min); + int maximum_versions = (g_conf->paxos_min + g_conf->paxos_trim_min); if (going_to_trim || (available_versions <= maximum_versions)) return false; |