summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-07-22 16:00:07 -0700
committerSamuel Just <sam.just@inktank.com>2013-07-25 20:37:49 -0700
commit6951d2345a5d837c3b14103bd4d8f5ee4407c937 (patch)
tree99b053230259852ef8979b091f2a3dd5a506b8db
parentfbf74d987d22dcca832a8afd358e0d76d4442f4c (diff)
downloadceph-6951d2345a5d837c3b14103bd4d8f5ee4407c937.tar.gz
OSD: tolerate holes in stored maps
We may have holes in stored maps during init_splits_between and advance_pg. In either case, we should simply skip the missing maps. Fixes: #5677 Signed-off-by: Samuel Just <sam.just@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/osd/OSD.cc20
-rw-r--r--src/osd/OSD.h7
-rw-r--r--src/osd/PG.cc1
-rw-r--r--src/osd/osd_types.h7
4 files changed, 27 insertions, 8 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 4be586f3d15..bc3aa604fec 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -291,11 +291,13 @@ void OSDService::init_splits_between(pg_t pgid,
// Ok, a split happened, so we need to walk the osdmaps
set<pg_t> new_pgs; // pgs to scan on each map
new_pgs.insert(pgid);
+ OSDMapRef curmap(get_map(frommap->get_epoch()));
for (epoch_t e = frommap->get_epoch() + 1;
e <= tomap->get_epoch();
++e) {
- OSDMapRef curmap(get_map(e-1));
- OSDMapRef nextmap(get_map(e));
+ OSDMapRef nextmap(try_get_map(e));
+ if (!nextmap)
+ continue;
set<pg_t> even_newer_pgs; // pgs added in this loop
for (set<pg_t>::iterator i = new_pgs.begin(); i != new_pgs.end(); ++i) {
set<pg_t> split_pgs;
@@ -307,7 +309,9 @@ void OSDService::init_splits_between(pg_t pgid,
}
}
new_pgs.insert(even_newer_pgs.begin(), even_newer_pgs.end());
+ curmap = nextmap;
}
+ assert(curmap == tomap); // we must have had both frommap and tomap
}
}
@@ -5177,7 +5181,9 @@ void OSD::advance_pg(
for (;
next_epoch <= osd_epoch;
++next_epoch) {
- OSDMapRef nextmap = get_map(next_epoch);
+ OSDMapRef nextmap = service.try_get_map(next_epoch);
+ if (!nextmap)
+ continue;
vector<int> newup, newacting;
nextmap->pg_to_up_acting_osds(pg->info.pgid, newup, newacting);
@@ -5511,7 +5517,7 @@ OSDMapRef OSDService::_add_map(OSDMap *o)
return l;
}
-OSDMapRef OSDService::get_map(epoch_t epoch)
+OSDMapRef OSDService::try_get_map(epoch_t epoch)
{
Mutex::Locker l(map_cache_lock);
OSDMapRef retval = map_cache.lookup(epoch);
@@ -5524,8 +5530,10 @@ OSDMapRef OSDService::get_map(epoch_t epoch)
if (epoch > 0) {
dout(20) << "get_map " << epoch << " - loading and decoding " << map << dendl;
bufferlist bl;
- bool ok = _get_map_bl(epoch, bl);
- assert(ok);
+ if (!_get_map_bl(epoch, bl)) {
+ delete map;
+ return OSDMapRef();
+ }
map->decode(bl);
} else {
dout(20) << "get_map " << epoch << " - return initial " << map << dendl;
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 04ad4dcd7d7..f9ceaf81bf3 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -441,7 +441,12 @@ public:
SimpleLRU<epoch_t, bufferlist> map_bl_cache;
SimpleLRU<epoch_t, bufferlist> map_bl_inc_cache;
- OSDMapRef get_map(epoch_t e);
+ OSDMapRef try_get_map(epoch_t e);
+ OSDMapRef get_map(epoch_t e) {
+ OSDMapRef ret(try_get_map(e));
+ assert(ret);
+ return ret;
+ }
OSDMapRef add_map(OSDMap *o) {
Mutex::Locker l(map_cache_lock);
return _add_map(o);
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 7373357db11..9f957b8e054 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -5032,7 +5032,6 @@ void PG::handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap,
vector<int>& newup, vector<int>& newacting,
RecoveryCtx *rctx)
{
- assert(osdmap->get_epoch() == (lastmap->get_epoch() + 1));
assert(lastmap->get_epoch() == osdmap_ref->get_epoch());
assert(lastmap == osdmap_ref);
dout(10) << "handle_advance_map " << newup << "/" << newacting << dendl;
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 3a6db4d8315..ca3dcc192b0 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -1141,6 +1141,13 @@ struct pg_history_t {
epoch_t last_epoch_clean; // lower bound on last epoch the PG was completely clean.
epoch_t last_epoch_split; // as parent
+ /**
+ * In the event of a map discontinuity, same_*_since may reflect the first
+ * map the osd has seen in the new map sequence rather than the actual start
+ * of the interval. This is ok since a discontinuity at epoch e means there
+ * must have been a clean interval between e and now and that we cannot be
+ * in the active set during the interval containing e.
+ */
epoch_t same_up_since; // same acting set since
epoch_t same_interval_since; // same acting AND up set since
epoch_t same_primary_since; // same primary at least back through this epoch.