summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLoic Dachary <loic@dachary.org>2013-07-28 13:50:43 +0200
committerSamuel Just <sam.just@inktank.com>2013-07-29 12:49:25 -0700
commitee9f04c5f8674bc2e8e6c5d92e9193d904215cb5 (patch)
treea75fec7289f9ff2a526e6ac702254d767af8bac8
parent1f13d8ac5b879134942cac2f5aca00669f24581f (diff)
downloadceph-ee9f04c5f8674bc2e8e6c5d92e9193d904215cb5.tar.gz
check_new_interval must compare old acting with old osdmap
When trying to establish if the old acting set is either empty or smaller than the min_size of the osdmap, pg_interval_t::check_new_interval compares with the min_size of the new osdmap. Since the goal is to try to determine if the previous interval may have been writeable, it should not enter the if when there were not enough osds in the acting set ( i.e. < min_size ). But it may enter it anyway if min_size was decremented in the new osdmap. A complete set of unit tests were added to cover the logic of check_new_interval. The parameters are prepared to describe a situation where the function returns false (i.e. no new interval). Each case is described in a separate bloc that introduces the minimal changes to demonstrate the intended test case. Because a number of cases have the same output while implementing a different logic, the debug output is parsed to differentiate between them. A test case demonstrating the problem ( check_new_interval must compare old acting with old osdmap ) is added, with a link to the bug number for future reference. The problem is fixed. The text of two debug messages are slightly changed to make the maintenance of the test that match them easier. http://tracker.ceph.com/issues/5780 refs #5780 Signed-off-by: Loic Dachary <loic@dachary.org> Reviewed-by: Sage Weil <sage@inktank.com> Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/osd/osd_types.cc5
-rw-r--r--src/test/test_osd_types.cc416
2 files changed, 419 insertions, 2 deletions
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 02c1ef7b69d..fbd5cbbe9a0 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -1715,7 +1715,7 @@ bool pg_interval_t::check_new_interval(
if (!i.acting.empty() &&
i.acting.size() >=
- osdmap->get_pools().find(pool_id)->second.min_size) {
+ lastmap->get_pools().find(pool_id)->second.min_size) {
if (out)
*out << "generate_past_intervals " << i
<< ": not rw,"
@@ -1730,6 +1730,7 @@ bool pg_interval_t::check_new_interval(
*out << "generate_past_intervals " << i
<< " : primary up " << lastmap->get_up_from(i.acting[0])
<< "-" << lastmap->get_up_thru(i.acting[0])
+ << " includes interval"
<< std::endl;
} else if (last_epoch_clean >= i.first &&
last_epoch_clean <= i.last) {
@@ -1758,7 +1759,7 @@ bool pg_interval_t::check_new_interval(
} else {
i.maybe_went_rw = false;
if (out)
- *out << "generate_past_intervals " << i << " : empty" << std::endl;
+ *out << "generate_past_intervals " << i << " : acting set is too small" << std::endl;
}
return true;
} else {
diff --git a/src/test/test_osd_types.cc b/src/test/test_osd_types.cc
index 7a43b7b892a..fa4ae6163ac 100644
--- a/src/test/test_osd_types.cc
+++ b/src/test/test_osd_types.cc
@@ -17,6 +17,7 @@
#include "include/types.h"
#include "osd/osd_types.h"
+#include "osd/OSDMap.h"
#include "gtest/gtest.h"
#include "common/Thread.h"
@@ -117,6 +118,421 @@ TEST(hobject, prefixes5)
ASSERT_EQ(prefixes_out, prefixes_correct);
}
+TEST(pg_interval_t, check_new_interval)
+{
+ //
+ // Create a situation where osdmaps are the same so that
+ // each test case can diverge from it using minimal code.
+ //
+ int osd_id = 1;
+ epoch_t epoch = 40;
+ std::tr1::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ std::tr1::shared_ptr<OSDMap> lastmap(new OSDMap());
+ lastmap->set_max_osd(10);
+ lastmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ lastmap->set_epoch(epoch);
+ epoch_t same_interval_since = epoch;
+ epoch_t last_epoch_clean = same_interval_since;
+ int64_t pool_id = 200;
+ int pg_num = 4;
+ __u8 min_size = 2;
+ {
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ inc.new_up_thru[osd_id] = epoch + 1;
+ osdmap->apply_incremental(inc);
+ lastmap->apply_incremental(inc);
+ }
+ vector<int> new_acting;
+ new_acting.push_back(osd_id);
+ new_acting.push_back(osd_id + 1);
+ vector<int> old_acting = new_acting;
+ vector<int> new_up;
+ new_up.push_back(osd_id);
+ vector<int> old_up = new_up;
+ pg_t pgid;
+ pgid.set_pool(pool_id);
+
+ //
+ // Do nothing if there are no modifications in
+ // acting, up or pool size and that the pool is not
+ // being split
+ //
+ {
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_FALSE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals));
+ ASSERT_TRUE(past_intervals.empty());
+ }
+
+ //
+ // pool did not exist in the old osdmap
+ //
+ {
+ std::tr1::shared_ptr<OSDMap> lastmap(new OSDMap());
+ lastmap->set_max_osd(10);
+ lastmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ lastmap->set_epoch(epoch);
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
+ ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]);
+ }
+
+ //
+ // The acting set has changed
+ //
+ {
+ vector<int> new_acting;
+ int new_primary = osd_id + 1;
+ new_acting.push_back(new_primary);
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
+ ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]);
+ }
+
+ //
+ // The up set has changed
+ //
+ {
+ vector<int> new_up;
+ int new_primary = osd_id + 1;
+ new_up.push_back(new_primary);
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
+ ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]);
+ }
+
+ //
+ // PG is splitting
+ //
+ {
+ std::tr1::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ int new_pg_num = pg_num ^ 2;
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(new_pg_num);
+ osdmap->apply_incremental(inc);
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
+ ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]);
+ }
+
+ //
+ // PG size has changed
+ //
+ {
+ std::tr1::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ __u8 new_min_size = min_size + 1;
+ inc.new_pools[pool_id].min_size = new_min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ osdmap->apply_incremental(inc);
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
+ ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]);
+ ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]);
+ }
+
+ //
+ // The old acting set was empty : the previous interval could not
+ // have been rw
+ //
+ {
+ vector<int> old_acting;
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ostringstream out;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals,
+ &out));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_FALSE(past_intervals[same_interval_since].maybe_went_rw);
+ ASSERT_NE(string::npos, out.str().find("acting set is too small"));
+ }
+
+ //
+ // The old acting set did not have enough osd : it could
+ // not have been rw
+ //
+ {
+ vector<int> old_acting;
+ old_acting.push_back(osd_id);
+
+ //
+ // see http://tracker.ceph.com/issues/5780
+ // the size of the old acting set should be compared
+ // with the min_size of the old osdmap
+ //
+ // The new osdmap is created so that it triggers the
+ // bug.
+ //
+ std::tr1::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ __u8 new_min_size = old_acting.size();
+ inc.new_pools[pool_id].min_size = new_min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ osdmap->apply_incremental(inc);
+
+ ostringstream out;
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals,
+ &out));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_FALSE(past_intervals[same_interval_since].maybe_went_rw);
+ ASSERT_NE(string::npos, out.str().find("acting set is too small"));
+ }
+
+ //
+ // The acting set changes. The old acting set primary was up during the
+ // previous interval and may have been rw.
+ //
+ {
+ vector<int> new_acting;
+ new_acting.push_back(osd_id + 4);
+ new_acting.push_back(osd_id + 5);
+
+ ostringstream out;
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals,
+ &out));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_TRUE(past_intervals[same_interval_since].maybe_went_rw);
+ ASSERT_NE(string::npos, out.str().find("includes interval"));
+ }
+ //
+ // The acting set changes. The old acting set primary was not up
+ // during the old interval but last_epoch_clean is in the
+ // old interval and it may have been rw.
+ //
+ {
+ vector<int> new_acting;
+ new_acting.push_back(osd_id + 4);
+ new_acting.push_back(osd_id + 5);
+
+ std::tr1::shared_ptr<OSDMap> lastmap(new OSDMap());
+ lastmap->set_max_osd(10);
+ lastmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ lastmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ inc.new_up_thru[osd_id] = epoch - 10;
+ lastmap->apply_incremental(inc);
+
+ ostringstream out;
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals,
+ &out));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_TRUE(past_intervals[same_interval_since].maybe_went_rw);
+ ASSERT_NE(string::npos, out.str().find("presumed to have been rw"));
+ }
+
+ //
+ // The acting set changes. The old acting set primary was not up
+ // during the old interval and last_epoch_clean is before the
+ // old interval : the previous interval could not possibly have
+ // been rw.
+ //
+ {
+ vector<int> new_acting;
+ new_acting.push_back(osd_id + 4);
+ new_acting.push_back(osd_id + 5);
+
+ epoch_t last_epoch_clean = epoch - 10;
+
+ std::tr1::shared_ptr<OSDMap> lastmap(new OSDMap());
+ lastmap->set_max_osd(10);
+ lastmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ lastmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ inc.new_up_thru[osd_id] = last_epoch_clean;
+ lastmap->apply_incremental(inc);
+
+ ostringstream out;
+
+ map<epoch_t, pg_interval_t> past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+ new_acting,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pool_id,
+ pgid,
+ &past_intervals,
+ &out));
+ ASSERT_EQ((unsigned int)1, past_intervals.size());
+ ASSERT_FALSE(past_intervals[same_interval_since].maybe_went_rw);
+ ASSERT_NE(string::npos, out.str().find("does not include interval"));
+ }
+}
+
TEST(pg_t, split)
{
pg_t pgid(0, 0, -1);