summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-09-17 08:26:51 -0700
committerSamuel Just <sam.just@inktank.com>2013-09-23 22:55:56 -0700
commitf0b030270d4218d4fafad9014d08fce688a0a5c0 (patch)
tree2484dde649040b51137f019f4b66477ba2f2f63a
parent857e20227190845ac1c2d2977f7ee97249b189cb (diff)
downloadceph-f0b030270d4218d4fafad9014d08fce688a0a5c0.tar.gz
ReplicatedPG: don't rescan the local collection if we can avoid it
Signed-off-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/osd/PG.h1
-rw-r--r--src/osd/ReplicatedPG.cc64
-rw-r--r--src/osd/ReplicatedPG.h6
3 files changed, 63 insertions, 8 deletions
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 74809eea268..a11b2076c33 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -439,6 +439,7 @@ protected:
*/
struct BackfillInterval {
// info about a backfill interval on a peer
+ eversion_t version; /// version at which the scan occurred
map<hobject_t,eversion_t> objects;
hobject_t begin;
hobject_t end;
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index c831af45a84..0fc8f1f717e 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -7939,14 +7939,8 @@ int ReplicatedPG::recover_backfill(
int local_min = osd->store->get_ideal_list_min();
int local_max = osd->store->get_ideal_list_max();
- // re-scan our local interval to cope with recent changes
- // FIXME: we could track the eversion_t when we last scanned, and invalidate
- // that way. or explicitly modify/invalidate when we actually change specific
- // objects.
- dout(10) << " rescanning local backfill_info from " << backfill_pos << dendl;
- backfill_info.clear();
- osr->flush();
- scan_range(backfill_pos, local_min, local_max, &backfill_info, handle);
+ // update our local interval to cope with recent changes
+ update_range(&backfill_info, handle);
int ops = 0;
map<hobject_t, pair<eversion_t, eversion_t> > to_push;
@@ -8121,11 +8115,64 @@ void ReplicatedPG::prep_backfill_object_push(
start_recovery_op(oid);
recovering.insert(oid);
ObjectContextRef obc = get_object_context(oid, false);
+
+ // We need to take the read_lock here in order to flush in-progress writes
+ obc->ondisk_read_lock();
pgbackend->recover_object(
oid,
ObjectContextRef(),
obc,
h);
+ obc->ondisk_read_unlock();
+}
+
+void ReplicatedPG::update_range(
+ BackfillInterval *bi,
+ ThreadPool::TPHandle &handle)
+{
+ int local_min = osd->store->get_ideal_list_min();
+ int local_max = osd->store->get_ideal_list_max();
+ if (bi->version >= info.last_update) {
+ dout(10) << __func__<< ": bi is current " << dendl;
+ assert(bi->version == info.last_update);
+ return;
+ } else if (bi->version >= info.log_tail) {
+ dout(10) << __func__<< ": bi is old, (" << bi->version
+ << ") can be updated with log" << dendl;
+ list<pg_log_entry_t>::const_iterator i =
+ pg_log.get_log().log.end();
+ while (i != pg_log.get_log().log.begin() &&
+ i->version > bi->version) {
+ --i;
+ }
+
+ if (i != pg_log.get_log().log.end())
+ dout(10) << __func__ << ": updating from version " << i->version
+ << dendl;
+ for (; i != pg_log.get_log().log.end(); ++i) {
+ const hobject_t &soid = i->soid;
+ if (soid >= bi->begin && soid < bi->end) {
+ if (i->is_update()) {
+ dout(10) << __func__ << ": " << i->soid << " updated to version "
+ << i->version << dendl;
+ bi->objects.erase(i->soid);
+ bi->objects.insert(
+ make_pair(
+ i->soid,
+ i->version));
+ } else if (i->is_delete()) {
+ dout(10) << __func__ << ": " << i->soid << " removed" << dendl;
+ bi->objects.erase(i->soid);
+ }
+ }
+ }
+ } else {
+ dout(10) << __func__<< ": bi is old, rescanning local backfill_info"
+ << dendl;
+ backfill_info.clear();
+ osr->flush();
+ scan_range(backfill_pos, local_min, local_max, &backfill_info, handle);
+ }
}
void ReplicatedPG::scan_range(
@@ -8134,6 +8181,7 @@ void ReplicatedPG::scan_range(
{
assert(is_locked());
dout(10) << "scan_range from " << begin << dendl;
+ bi->version = info.last_update;
bi->begin = begin;
bi->objects.clear(); // for good measure
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index a4e5ac1e1a1..5105207694b 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -624,6 +624,12 @@ protected:
ThreadPool::TPHandle &handle
);
+ /// Update a hash range to reflect changes since the last scan
+ void update_range(
+ BackfillInterval *bi, ///< [in,out] interval to update
+ ThreadPool::TPHandle &handle ///< [in] tp handle
+ );
+
void prep_backfill_object_push(
hobject_t oid, eversion_t v, eversion_t have, int peer,
PGBackend::RecoveryHandle *h);