summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Wilkins <john.wilkins@inktank.com>2013-01-28 17:51:20 -0800
committerJohn Wilkins <john.wilkins@inktank.com>2013-01-28 17:51:20 -0800
commite58fe519800877f69628046d83d948d989e6ad5f (patch)
treeba8e1eb68dd4416fa137f5a588ed01681ba63f4e
parentb429a3a3bbc8b95b71b57158d2bb6bc1fbe37045 (diff)
parentc79f7c6c037bd5653db5342aff7443d99a9c6acd (diff)
downloadceph-e58fe519800877f69628046d83d948d989e6ad5f.tar.gz
Merge branch 'master' of https://github.com/ceph/ceph
-rw-r--r--doc/man/8/ceph.rst2
-rw-r--r--doc/man/8/rados.rst2
-rw-r--r--doc/rados/operations/control.rst4
-rw-r--r--doc/rados/operations/pools.rst2
-rw-r--r--man/ceph.84
-rw-r--r--man/rados.82
-rwxr-xr-xqa/workunits/mon/pool_ops.sh22
-rwxr-xr-xqa/workunits/rbd/copy.sh21
-rwxr-xr-xqa/workunits/rbd/permissions.sh4
-rw-r--r--src/common/config_opts.h1
-rw-r--r--src/crush/CrushWrapper.cc37
-rw-r--r--src/crush/CrushWrapper.h8
-rw-r--r--src/mon/Elector.cc2
-rw-r--r--src/mon/Monitor.cc116
-rw-r--r--src/mon/Monitor.h8
-rw-r--r--src/mon/OSDMonitor.cc53
-rw-r--r--src/mon/OSDMonitor.h3
-rw-r--r--src/osd/OSDMap.cc62
-rw-r--r--src/osd/OSDMap.h6
-rw-r--r--src/rados.cc5
-rw-r--r--src/rgw/rgw_rest_s3.cc2
-rw-r--r--src/test/cli/ceph/help.t2
-rw-r--r--src/tools/ceph.cc2
23 files changed, 278 insertions, 92 deletions
diff --git a/doc/man/8/ceph.rst b/doc/man/8/ceph.rst
index 04b51e609f7..634c82c433e 100644
--- a/doc/man/8/ceph.rst
+++ b/doc/man/8/ceph.rst
@@ -79,7 +79,7 @@ Monitor commands
================
A more complete summary of commands understood by the monitor cluster can be found in the
-wiki, at
+online documentation, at
http://ceph.com/docs/master/rados/operations/control
diff --git a/doc/man/8/rados.rst b/doc/man/8/rados.rst
index f6fde9b91a6..e12f2da646a 100644
--- a/doc/man/8/rados.rst
+++ b/doc/man/8/rados.rst
@@ -65,7 +65,7 @@ Global commands
:command:`mkpool` *foo*
Create a pool with name foo.
-:command:`rmpool` *foo*
+:command:`rmpool` *foo* [ *foo* --yes-i-really-really-mean-it ]
Delete the pool foo (and all its data)
diff --git a/doc/rados/operations/control.rst b/doc/rados/operations/control.rst
index db5b8008675..e956c4d9334 100644
--- a/doc/rados/operations/control.rst
+++ b/doc/rados/operations/control.rst
@@ -237,8 +237,8 @@ Creates/deletes a snapshot of a pool. ::
Creates/deletes/renames a storage pool. ::
ceph osd pool create {pool-name} pg_num [pgp_num]
- ceph osd pool delete {pool-name}
- ceph osd pool rename {old-name} {new-name}
+ ceph osd pool delete {pool-name} [{pool-name} --yes-i-really-really-mean-it]
+ ceph osd pool rename {old-name} {new-name}
Changes a pool setting. ::
diff --git a/doc/rados/operations/pools.rst b/doc/rados/operations/pools.rst
index a13fa91bcd8..3b492f3862e 100644
--- a/doc/rados/operations/pools.rst
+++ b/doc/rados/operations/pools.rst
@@ -106,7 +106,7 @@ Delete a Pool
To delete a pool, execute::
- ceph osd pool delete {pool-name}
+ ceph osd pool delete {pool-name} [{pool-name} --yes-i-really-really-mean-it]
If you created your own rulesets and rules for a pool you created, you should
diff --git a/man/ceph.8 b/man/ceph.8
index 483eb969d8f..b08395423ae 100644
--- a/man/ceph.8
+++ b/man/ceph.8
@@ -104,10 +104,10 @@ ceph pg dump \-o pg.txt
.SH MONITOR COMMANDS
.sp
A more complete summary of commands understood by the monitor cluster can be found in the
-wiki, at
+online documentation, at
.INDENT 0.0
.INDENT 3.5
-\fI\%http://ceph.com/docs/master/cluster-ops/control\fP
+\fI\%http://ceph.com/docs/master/rados/operations/control\fP
.UNINDENT
.UNINDENT
.SH AVAILABILITY
diff --git a/man/rados.8 b/man/rados.8
index 046ffbf433d..ef65f651e98 100644
--- a/man/rados.8
+++ b/man/rados.8
@@ -90,7 +90,7 @@ counts, over the entire system and broken down by pool.
.B \fBmkpool\fP \fIfoo\fP
Create a pool with name foo.
.TP
-.B \fBrmpool\fP \fIfoo\fP
+.B \fBrmpool\fP \fIfoo\fP [ \flfoo\fP \-\-yes\-i\-really\-really\-mean\-it ]
Delete the pool foo (and all its data)
.UNINDENT
.SH POOL SPECIFIC COMMANDS
diff --git a/qa/workunits/mon/pool_ops.sh b/qa/workunits/mon/pool_ops.sh
index 0fed24d7474..2d346cc137f 100755
--- a/qa/workunits/mon/pool_ops.sh
+++ b/qa/workunits/mon/pool_ops.sh
@@ -7,11 +7,25 @@ ceph osd pool create fooo 123
ceph osd pool create foo 123 # idempotent
-ceph osd pool delete foo
-ceph osd pool delete foo
-ceph osd pool delete fuggg
+# should fail due to safety interlock
+! ceph osd pool delete foo
+! ceph osd pool delete foo foo
+! ceph osd pool delete foo foo --force
+! ceph osd pool delete foo fooo --yes-i-really-mean-it
+! ceph osd pool delete foo --yes-i-really-mean-it foo
+! ceph osd pool delete --yes-i-really-mean-it foo foo
-ceph osd pool delete fooo
+
+ceph osd pool delete fooo fooo --yes-i-really-really-mean-it
+ceph osd pool delete foo foo --yes-i-really-really-mean-it
+
+# idempotent
+ceph osd pool delete foo foo --yes-i-really-really-mean-it
+ceph osd pool delete fooo fooo --yes-i-really-really-mean-it
+ceph osd pool delete fooo fooo --yes-i-really-really-mean-it
+
+# non-existent pool
+! ceph osd pool delete fuggg fuggg --yes-i-really-really-mean-it
echo OK
diff --git a/qa/workunits/rbd/copy.sh b/qa/workunits/rbd/copy.sh
index cd0bea79c2c..a3334c073d6 100755
--- a/qa/workunits/rbd/copy.sh
+++ b/qa/workunits/rbd/copy.sh
@@ -1,5 +1,8 @@
#!/bin/sh -ex
+# make sure rbd pool is EMPTY.. this is a test script!!
+rbd ls | wc -l | grep -v '^0$' && echo "nonempty rbd pool, aborting! run this script on an empty test cluster only." && exit 1
+
IMGS="testimg1 testimg2 testimg3 foo foo2 bar bar2 test1 test2 test3"
remove_images() {
@@ -90,7 +93,7 @@ test_rename() {
! rbd rename rbd2/bar --dest-pool rbd foo
rbd rename --pool rbd2 bar --dest-pool rbd2 foo
rbd -p rbd2 ls | grep foo
- rados rmpool rbd2
+ rados rmpool rbd2 rbd2 --yes-i-really-really-mean-it
remove_images
}
@@ -142,7 +145,7 @@ test_ls() {
done
for i in $(seq -w 00 99); do
- rbd create image.$i --format 2 -s 1
+ rbd create image.$i --image-format 2 -s 1
done
rbd ls | wc -l | grep 100
rbd ls -l | grep image | wc -l | grep 100
@@ -234,7 +237,7 @@ test_pool_image_args() {
echo "testing pool and image args..."
remove_images
- ceph osd pool delete test || true
+ ceph osd pool delete test test --yes-i-really-really-mean-it || true
ceph osd pool create test 100
truncate -s 1 /tmp/empty
@@ -283,8 +286,8 @@ test_pool_image_args() {
rbd ls test | grep -qv test12
rm -f /tmp/empty
- ceph osd pool delete test
- ceph osd pool delete rbd
+ ceph osd pool delete test test --yes-i-really-really-mean-it
+ ceph osd pool delete rbd rbd --yes-i-really-really-mean-it
ceph osd pool create rbd 100
}
@@ -307,9 +310,9 @@ test_clone() {
rbd ls -l | grep clone2 | grep rbd2/clone@s1
rbd -p rbd2 ls | grep -v clone2
- rados rmpool rbd2
- rados rmpool rbd
- rados mkpool rbd
+ rados rmpool rbd2 rbd2 --yes-i-really-really-mean-it
+ rados rmpool rbd rbd --yes-i-really-really-mean-it
+ rados mkpool rbd rbd --yes-i-really-really-mean-it
}
test_pool_image_args
@@ -319,7 +322,7 @@ test_remove
RBD_CREATE_ARGS=""
test_others
test_locking
-RBD_CREATE_ARGS="--format 2"
+RBD_CREATE_ARGS="--image-format 2"
test_others
test_locking
test_clone
diff --git a/qa/workunits/rbd/permissions.sh b/qa/workunits/rbd/permissions.sh
index 40428df38e3..74c24c03c2b 100755
--- a/qa/workunits/rbd/permissions.sh
+++ b/qa/workunits/rbd/permissions.sh
@@ -6,8 +6,8 @@ create_pools() {
}
delete_pools() {
- (ceph osd pool delete images || true) >/dev/null 2>&1
- (ceph osd pool delete volumes || true) >/dev/null 2>&1
+ (ceph osd pool delete images images --yes-i-really-really-mean-it || true) >/dev/null 2>&1
+ (ceph osd pool delete volumes volumes --yes-i-really-really-mean-it || true) >/dev/null 2>&1
}
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 59caca5a6a2..a778268d51a 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -127,6 +127,7 @@ OPTION(mon_osd_auto_mark_in, OPT_BOOL, false) // mark any booting osds '
OPTION(mon_osd_auto_mark_auto_out_in, OPT_BOOL, true) // mark booting auto-marked-out osds 'in'
OPTION(mon_osd_auto_mark_new_in, OPT_BOOL, true) // mark booting new osds 'in'
OPTION(mon_osd_down_out_interval, OPT_INT, 300) // seconds
+OPTION(mon_osd_down_out_subtree_limit, OPT_STR, "rack") // largest crush unit/type that we will automatically mark out
OPTION(mon_osd_min_up_ratio, OPT_DOUBLE, .3) // min osds required to be up to mark things down
OPTION(mon_osd_min_in_ratio, OPT_DOUBLE, .3) // min osds required to be in to mark things out
OPTION(mon_lease, OPT_FLOAT, 5) // lease interval
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 3bae96c8689..45e4fb53de6 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -202,6 +202,23 @@ map<int, string> CrushWrapper::get_parent_hierarchy(int id)
return parent_hierarchy;
}
+int CrushWrapper::get_children(int id, list<int> *children)
+{
+ // leaf?
+ if (id >= 0) {
+ return 0;
+ }
+
+ crush_bucket *b = get_bucket(id);
+ if (!b) {
+ return -ENOENT;
+ }
+
+ for (unsigned n=0; n<b->size; n++) {
+ children->push_back(b->items[n]);
+ }
+ return b->size;
+}
int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string name,
@@ -426,24 +443,36 @@ pair<string,string> CrushWrapper::get_immediate_parent(int id)
{
pair <string, string> loc;
-
for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
crush_bucket *b = crush->buckets[bidx];
if (b == 0)
continue;
for (unsigned i = 0; i < b->size; i++)
- if (b->items[i] == id){
+ if (b->items[i] == id) {
string parent_id = name_map[b->id];
string parent_bucket_type = type_map[b->type];
loc = make_pair(parent_bucket_type, parent_id);
}
}
-
return loc;
}
-
+int CrushWrapper::get_immediate_parent_id(int id, int *parent)
+{
+ for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
+ crush_bucket *b = crush->buckets[bidx];
+ if (b == 0)
+ continue;
+ for (unsigned i = 0; i < b->size; i++) {
+ if (b->items[i] == id) {
+ *parent = b->id;
+ return 0;
+ }
+ }
+ }
+ return -ENOENT;
+}
void CrushWrapper::reweight(CephContext *cct)
{
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index 56bcb598ff3..7def6e4ab34 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -284,6 +284,7 @@ public:
* returns the (type, name) of the parent bucket of id
*/
pair<string,string> get_immediate_parent(int id);
+ int get_immediate_parent_id(int id, int *parent);
/**
* get the fully qualified location of a device by successively finding
@@ -302,6 +303,13 @@ public:
*/
map<int, string> get_parent_hierarchy(int id);
+ /**
+ * enumerate immediate children of given node
+ *
+ * @param id parent bucket or device id
+ * @return number of items, or error
+ */
+ int get_children(int id, list<int> *children);
/**
* insert an item into the map at a specific position
diff --git a/src/mon/Elector.cc b/src/mon/Elector.cc
index e2ffa6bd571..199eaeae538 100644
--- a/src/mon/Elector.cc
+++ b/src/mon/Elector.cc
@@ -271,6 +271,8 @@ void Elector::handle_victory(MMonElection *m)
assert(from < mon->rank);
assert(m->epoch % 2 == 0);
+ leader_acked = -1;
+
// i should have seen this election if i'm getting the victory.
if (m->epoch != epoch + 1) {
dout(5) << "woah, that's a funny epoch, i must have rebooted. bumping and re-starting!" << dendl;
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 143ee65ed97..699db8968f1 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -736,7 +736,7 @@ void Monitor::reset()
{
dout(10) << "reset" << dendl;
- timecheck_cleanup();
+ timecheck_finish();
leader_since = utime_t();
if (!quorum.empty()) {
@@ -1189,7 +1189,7 @@ void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features)
finish_election();
if (monmap->size() > 1)
- timecheck();
+ timecheck_start();
}
void Monitor::lose_election(epoch_t epoch, set<int> &q, int l, uint64_t features)
@@ -1213,6 +1213,7 @@ void Monitor::lose_election(epoch_t epoch, set<int> &q, int l, uint64_t features
void Monitor::finish_election()
{
+ timecheck_finish();
exited_quorum = utime_t();
finish_contexts(g_ceph_context, waitfor_quorum);
finish_contexts(g_ceph_context, maybe_wait_for_quorum);
@@ -2240,18 +2241,98 @@ bool Monitor::_ms_dispatch(Message *m)
return ret;
}
+void Monitor::timecheck_start()
+{
+ dout(10) << __func__ << dendl;
+ timecheck_cleanup();
+ timecheck_start_round();
+}
+
+void Monitor::timecheck_finish()
+{
+ dout(10) << __func__ << dendl;
+ timecheck_cleanup();
+}
+
+void Monitor::timecheck_start_round()
+{
+ dout(10) << __func__ << " curr " << timecheck_round << dendl;
+ assert(is_leader());
+
+ if (monmap->size() == 1) {
+ assert(0 == "We are alone; this shouldn't have been scheduled!");
+ return;
+ }
+
+ if (timecheck_round % 2) {
+ dout(10) << __func__ << " there's a timecheck going on" << dendl;
+ utime_t curr_time = ceph_clock_now(g_ceph_context);
+ double max = g_conf->mon_timecheck_interval*3;
+ if (curr_time - timecheck_round_start > max) {
+ dout(10) << __func__ << " keep current round going" << dendl;
+ goto out;
+ } else {
+ dout(10) << __func__
+ << " finish current timecheck and start new" << dendl;
+ timecheck_cancel_round();
+ }
+ }
+
+ assert(timecheck_round % 2 == 0);
+ timecheck_acks = 0;
+ timecheck_round ++;
+ timecheck_round_start = ceph_clock_now(g_ceph_context);
+ dout(10) << __func__ << " new " << timecheck_round << dendl;
+
+ timecheck();
+out:
+ dout(10) << __func__ << " setting up next event" << dendl;
+ timecheck_event = new C_TimeCheck(this);
+ timer.add_event_after(g_conf->mon_timecheck_interval, timecheck_event);
+}
+
+void Monitor::timecheck_finish_round(bool success)
+{
+ dout(10) << __func__ << " curr " << timecheck_round << dendl;
+ assert(timecheck_round % 2);
+ timecheck_round ++;
+ timecheck_round_start = utime_t();
+
+ if (success) {
+ assert(timecheck_waiting.size() == 0);
+ assert(timecheck_acks == quorum.size());
+ timecheck_report();
+ return;
+ }
+
+ dout(10) << __func__ << " " << timecheck_waiting.size()
+ << " peers still waiting:";
+ for (map<entity_inst_t,utime_t>::iterator p = timecheck_waiting.begin();
+ p != timecheck_waiting.end(); ++p) {
+ *_dout << " " << p->first.name;
+ }
+ *_dout << dendl;
+ timecheck_waiting.clear();
+
+ dout(10) << __func__ << " finished to " << timecheck_round << dendl;
+}
+
+void Monitor::timecheck_cancel_round()
+{
+ timecheck_finish_round(false);
+}
+
void Monitor::timecheck_cleanup()
{
timecheck_round = 0;
timecheck_acks = 0;
+ timecheck_round_start = utime_t();
if (timecheck_event) {
timer.cancel_event(timecheck_event);
timecheck_event = NULL;
}
-
- if (timecheck_waiting.size() > 0)
- timecheck_waiting.clear();
+ timecheck_waiting.clear();
timecheck_skews.clear();
timecheck_latencies.clear();
}
@@ -2300,20 +2381,12 @@ void Monitor::timecheck()
{
dout(10) << __func__ << dendl;
assert(is_leader());
-
if (monmap->size() == 1) {
- assert(0 == "We are alone; this shouldn't have been scheduled!");
+ assert(0 == "We are alone; we shouldn't have gotten here!");
return;
}
+ assert(timecheck_round % 2 != 0);
- if ((timecheck_round % 2) != 0) {
- dout(15) << __func__
- << " timecheck still in progress; laggy monitors maybe?"
- << dendl;
- goto out;
- }
-
- timecheck_round++;
timecheck_acks = 1; // we ack ourselves
dout(10) << __func__ << " start timecheck epoch " << get_epoch()
@@ -2336,12 +2409,6 @@ void Monitor::timecheck()
dout(10) << __func__ << " send " << *m << " to " << inst << dendl;
messenger->send_message(m, inst);
}
-
-out:
- dout(10) << __func__ << " setting up next event and timeout" << dendl;
- timecheck_event = new C_TimeCheck(this);
-
- timer.add_event_after(g_conf->mon_timecheck_interval, timecheck_event);
}
health_status_t Monitor::timecheck_status(ostringstream &ss,
@@ -2394,9 +2461,7 @@ void Monitor::handle_timecheck_leader(MTimeCheck *m)
dout(1) << __func__ << " our clock was readjusted --"
<< " bump round and drop current check"
<< dendl;
- timecheck_round++;
- timecheck_acks = 0;
- timecheck_waiting.clear();
+ timecheck_cancel_round();
return;
}
@@ -2481,8 +2546,7 @@ void Monitor::handle_timecheck_leader(MTimeCheck *m)
assert(timecheck_skews.size() == timecheck_acks);
assert(timecheck_waiting.size() == 0);
// everyone has acked, so bump the round to finish it.
- timecheck_round++;
- timecheck_report();
+ timecheck_finish_round();
}
}
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index 9716e351348..c7704bb16da 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -238,6 +238,7 @@ private:
// finished.
version_t timecheck_round;
unsigned int timecheck_acks;
+ utime_t timecheck_round_start;
/**
* Time Check event.
*/
@@ -247,10 +248,15 @@ private:
Monitor *mon;
C_TimeCheck(Monitor *m) : mon(m) { }
void finish(int r) {
- mon->timecheck();
+ mon->timecheck_start_round();
}
};
+ void timecheck_start();
+ void timecheck_finish();
+ void timecheck_start_round();
+ void timecheck_finish_round(bool success = true);
+ void timecheck_cancel_round();
void timecheck_cleanup();
void timecheck_report();
void timecheck();
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 96e2aa12ea7..3d11cfffc0f 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -61,7 +61,6 @@ static ostream& _prefix(std::ostream *_dout, Monitor *mon, OSDMap& osdmap) {
/************ MAPS ****************/
OSDMonitor::OSDMonitor(Monitor *mn, Paxos *p)
: PaxosService(mn, p),
- delete_pool_nonce(0),
thrash_map(0), thrash_last_up_osd(-1)
{
// we need to trim this too
@@ -1458,6 +1457,8 @@ void OSDMonitor::tick()
* ratio set by g_conf->mon_osd_min_in_ratio. So it's not really up to us.
*/
if (can_mark_out(-1)) {
+ set<int> down_cache; // quick cache of down subtrees
+
map<int,utime_t>::iterator i = down_pending_out.begin();
while (i != down_pending_out.end()) {
int o = i->first;
@@ -1484,6 +1485,20 @@ void OSDMonitor::tick()
grace += my_grace;
}
+ // is this an entire large subtree down?
+ if (g_conf->mon_osd_down_out_subtree_limit.length()) {
+ int type = osdmap.crush->get_type_id(g_conf->mon_osd_down_out_subtree_limit.c_str());
+ if (type > 0) {
+ if (osdmap.containing_subtree_is_down(g_ceph_context, o, type, &down_cache)) {
+ dout(10) << "tick entire containing " << g_conf->mon_osd_down_out_subtree_limit
+ << " subtree for osd." << o << " is down; resetting timer" << dendl;
+ // reset timer, too.
+ down_pending_out[o] = now;
+ continue;
+ }
+ }
+ }
+
if (g_conf->mon_osd_down_out_interval > 0 &&
down.sec() >= grace) {
dout(10) << "tick marking osd." << o << " OUT after " << down
@@ -2780,44 +2795,22 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
return true;
} else if (m->cmd[2] == "delete" && m->cmd.size() >= 4) {
- // osd pool delete <poolname> <poolname again> <nonce>
- // hey, let's delete a pool!
+ // osd pool delete <poolname> <poolname again> --yes-i-really-really-mean-it
int64_t pool = osdmap.lookup_pg_pool_name(m->cmd[3].c_str());
if (pool < 0) {
ss << "pool '" << m->cmd[3] << "' does not exist";
err = 0;
goto out;
}
- if (m->cmd.size() < 6) {
- delete_pool_nonce = rand();
- delete_pool_nonce_timeout = ceph_clock_now(g_ceph_context);
- delete_pool_nonce_timeout += 30;
- ss << "WARNING: this will efficiently **DESTROY** an entire pool of data. if you are ABSOLUTELY CERTAIN"
- << " that this is what you want to do, retry listing the pool name twice, followed by " << delete_pool_nonce
- << " within 30 seconds.";
- err = -EPERM;
- goto out;
- }
- assert(m->cmd.size() >= 6);
- if (m->cmd[4] != m->cmd[3]) {
- ss << "ERROR: you must list the pool name you want to **DESTROY** twice";
- err = -EPERM;
- goto out;
- }
- unsigned safety = atol(m->cmd[5].c_str());
- if (safety != delete_pool_nonce) {
- ss << "ERROR: did not confirm pool deletion with correct confirmation; " << safety << " != " << delete_pool_nonce << "; try again";
- err = -EPERM;
- goto out;
- }
- if (ceph_clock_now(g_ceph_context) > delete_pool_nonce_timeout) {
- ss << "ERROR: did not confirm pool deletion within 30 seconds; try again";
+ if (m->cmd.size() != 6 ||
+ m->cmd[3] != m->cmd[4] ||
+ m->cmd[5] != "--yes-i-really-really-mean-it") {
+ ss << "WARNING: this will *PERMANENTLY DESTROY* all data stored in pool " << m->cmd[3]
+ << ". If you are *ABSOLUTELY CERTAIN* that is what you want, pass the pool name *twice*, "
+ << "followed by --yes-i-really-really-mean-it.";
err = -EPERM;
goto out;
}
- assert(safety == delete_pool_nonce);
- delete_pool_nonce = 0;
- delete_pool_nonce_timeout = utime_t();
int ret = _prepare_remove_pool(pool);
if (ret == 0)
ss << "pool '" << m->cmd[3] << "' deleted";
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index e389c65e825..9529f731c84 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -123,9 +123,6 @@ private:
map<int,double> osd_weight;
- unsigned delete_pool_nonce; // safety interlock for removing pools
- utime_t delete_pool_nonce_timeout;
-
void check_failures(utime_t now);
bool check_failure(utime_t now, int target_osd, failure_info_t& fi);
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 439ff06505a..c7d044ac6fd 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -172,6 +172,68 @@ int OSDMap::Incremental::identify_osd(uuid_d u) const
return -1;
}
+bool OSDMap::subtree_is_down(int id, set<int> *down_cache) const
+{
+ if (id >= 0)
+ return is_down(id);
+
+ if (down_cache &&
+ down_cache->count(id)) {
+ return true;
+ }
+
+ list<int> children;
+ crush->get_children(id, &children);
+ for (list<int>::iterator p = children.begin(); p != children.end(); ++p) {
+ if (!subtree_is_down(*p, down_cache)) {
+ return false;
+ }
+ }
+ if (down_cache) {
+ down_cache->insert(id);
+ }
+ return true;
+}
+
+bool OSDMap::containing_subtree_is_down(CephContext *cct, int id, int subtree_type, set<int> *down_cache) const
+{
+ // use a stack-local down_cache if we didn't get one from the
+ // caller. then at least this particular call will avoid duplicated
+ // work.
+ set<int> local_down_cache;
+ if (!down_cache) {
+ down_cache = &local_down_cache;
+ }
+
+ if (!subtree_is_down(id, down_cache)) {
+ ldout(cct, 30) << "containing_subtree_is_down(" << id << ") = false" << dendl;
+ return false;
+ }
+
+ int current = id;
+ while (true) {
+ // invariant: current subtree is known to be down.
+ int type;
+ if (current >= 0) {
+ type = 0;
+ } else {
+ type = crush->get_bucket_type(current);
+ }
+ assert(type >= 0);
+
+ // is this a big enough subtree to be done?
+ if (type >= subtree_type) {
+ ldout(cct, 30) << "containing_subtree_is_down(" << id << ") = true ... " << type << " >= " << subtree_type << dendl;
+ return true;
+ }
+
+ int r = crush->get_immediate_parent_id(current, &current);
+ if (r < 0) {
+ return false;
+ }
+ }
+}
+
void OSDMap::Incremental::encode_client_old(bufferlist& bl) const
{
__u16 v = 5;
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index 5105fc7ab0e..f3f84f0b470 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -316,6 +316,12 @@ private:
bool is_in(int osd) const {
return exists(osd) && !is_out(osd);
}
+
+ /**
+ * check if an entire crush subtre is down
+ */
+ bool subtree_is_down(int id, set<int> *down_cache) const;
+ bool containing_subtree_is_down(CephContext *cct, int osd, int subtree_type, set<int> *down_cache) const;
int identify_osd(const entity_addr_t& addr) const;
int identify_osd(const uuid_d& u) const;
diff --git a/src/rados.cc b/src/rados.cc
index 0b012c5972b..a850f874ac2 100644
--- a/src/rados.cc
+++ b/src/rados.cc
@@ -58,7 +58,8 @@ void usage(ostream& out)
" mkpool <pool-name> [123[ 4]] create pool <pool-name>'\n"
" [with auid 123[and using crush rule 4]]\n"
" cppool <pool-name> <dest-pool> copy content of a pool\n"
-" rmpool <pool-name> remove pool <pool-name>'\n"
+" rmpool <pool-name> [<pool-name> --yes-i-really-really-mean-it]\n"
+" remove pool <pool-name>'\n"
" df show per-pool and total usage\n"
" ls list objects in pool\n\n"
" chown 123 change the pool owner to auid 123\n"
@@ -1799,7 +1800,7 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
strcmp(nargs[1], nargs[2]) != 0 ||
strcmp(nargs[3], "--yes-i-really-really-mean-it") != 0) {
cerr << "WARNING:\n"
- << " This will PERMANENTLY DESTROY an entire pool of object with no way back.\n"
+ << " This will PERMANENTLY DESTROY an entire pool of objects with no way back.\n"
<< " To confirm, pass the pool to remove twice, followed by\n"
<< " --yes-i-really-really-mean-it" << std::endl;
cout << nargs << std::endl;
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index b2925940f77..dfa6827c7ff 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -707,7 +707,7 @@ int RGWPostObj_ObjStore_S3::get_params()
string whitespaces (" \t\f\v\n\r");
// get the part boundary
- string req_content_type_str = s->env->get("CONTENT_TYPE");
+ string req_content_type_str = s->env->get("CONTENT_TYPE", "");
string req_content_type;
map<string, string> params;
diff --git a/src/test/cli/ceph/help.t b/src/test/cli/ceph/help.t
index e8afdaf7d35..186490aebd4 100644
--- a/src/test/cli/ceph/help.t
+++ b/src/test/cli/ceph/help.t
@@ -56,7 +56,7 @@
ceph osd pool mksnap <pool> <snapname>
ceph osd pool rmsnap <pool> <snapname>
ceph osd pool create <pool> <pg_num> [<pgp_num>]
- ceph osd pool delete <pool>
+ ceph osd pool delete <pool> [<pool> --yes-i-really-really-mean-it]
ceph osd pool rename <pool> <new pool name>
ceph osd pool set <pool> <field> <value>
ceph osd scrub <osd-id>
diff --git a/src/tools/ceph.cc b/src/tools/ceph.cc
index 7582ac96ab2..c99d8624978 100644
--- a/src/tools/ceph.cc
+++ b/src/tools/ceph.cc
@@ -99,7 +99,7 @@ static void usage()
cout << " ceph osd pool mksnap <pool> <snapname>\n";
cout << " ceph osd pool rmsnap <pool> <snapname>\n";
cout << " ceph osd pool create <pool> <pg_num> [<pgp_num>]\n";
- cout << " ceph osd pool delete <pool>\n";
+ cout << " ceph osd pool delete <pool> [<pool> --yes-i-really-really-mean-it]\n";
cout << " ceph osd pool rename <pool> <new pool name>\n";
cout << " ceph osd pool set <pool> <field> <value>\n";
cout << " ceph osd scrub <osd-id>\n";