summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-11-25 09:18:44 -0800
committerSage Weil <sage@inktank.com>2012-11-25 09:18:44 -0800
commit39d2d08a233c81dc6e376f7016ff92f1fb33fa70 (patch)
treef87edb3f97cb154ed22b256f0f90a2d0fd774584
parent2b002a8de3d64a5c03a4710bb44923afd2feed36 (diff)
parent7602a055764aff1c50b1e2641a3e703845cbe471 (diff)
downloadceph-39d2d08a233c81dc6e376f7016ff92f1fb33fa70.tar.gz
Merge remote-tracking branch 'gh/next'
-rw-r--r--doc/man/8/mkcephfs.rst19
-rw-r--r--man/mkcephfs.815
-rw-r--r--qa/run_xfstests.sh2
-rw-r--r--src/Makefile.am2
-rw-r--r--src/auth/Crypto.cc6
-rw-r--r--src/auth/KeyRing.cc3
-rw-r--r--src/auth/cephx/CephxKeyServer.cc14
-rw-r--r--src/auth/cephx/CephxProtocol.cc2
-rw-r--r--src/ceph.conf.twoosds7
-rw-r--r--src/common/OutputDataSocket.cc418
-rw-r--r--src/common/OutputDataSocket.h72
-rw-r--r--src/common/config_opts.h3
-rw-r--r--src/common/sync_filesystem.h6
-rw-r--r--src/crush/CrushWrapper.cc33
-rw-r--r--src/crush/CrushWrapper.h9
-rw-r--r--src/init-ceph.in78
-rw-r--r--src/mds/Server.cc2
-rw-r--r--src/mkcephfs.in82
-rw-r--r--src/mon/AuthMonitor.cc6
-rw-r--r--src/mon/Monitor.cc20
-rw-r--r--src/mon/PGMonitor.cc17
-rw-r--r--src/msg/Accepter.cc4
-rw-r--r--src/os/FileStore.cc10
-rw-r--r--src/osdc/ObjectCacher.cc1
-rw-r--r--src/osdc/Striper.cc5
-rw-r--r--src/rbd.cc6
-rw-r--r--src/rgw/rgw_admin.cc21
-rw-r--r--src/rgw/rgw_client_io.cc3
-rw-r--r--src/rgw/rgw_client_io.h5
-rw-r--r--src/rgw/rgw_common.cc2
-rw-r--r--src/rgw/rgw_common.h2
-rw-r--r--src/rgw/rgw_gc.cc4
-rw-r--r--src/rgw/rgw_gc.h4
-rw-r--r--src/rgw/rgw_log.cc105
-rw-r--r--src/rgw/rgw_log.h20
-rw-r--r--src/rgw/rgw_main.cc76
-rw-r--r--src/rgw/rgw_op.cc23
-rw-r--r--src/rgw/rgw_op.h6
-rw-r--r--src/rgw/rgw_rados.cc5
-rw-r--r--src/rgw/rgw_rados.h7
-rw-r--r--src/rgw/rgw_tools.cc5
-rw-r--r--src/rgw/rgw_tools.h1
-rw-r--r--src/rgw/rgw_xml.cc9
-rw-r--r--src/sample.ceph.conf24
-rw-r--r--src/test/cli/osdmaptool/ceph.conf.withracks4
-rw-r--r--src/test/librbd/fsx.c174
46 files changed, 1096 insertions, 246 deletions
diff --git a/doc/man/8/mkcephfs.rst b/doc/man/8/mkcephfs.rst
index 5d0c9952356..05dee95e746 100644
--- a/doc/man/8/mkcephfs.rst
+++ b/doc/man/8/mkcephfs.rst
@@ -7,7 +7,7 @@
Synopsis
========
-| **mkcephfs** -c *ceph.conf* [ --mkbtrfs ] [ -a, --all-hosts [ -k
+| **mkcephfs** -c *ceph.conf* [ --mkfs ] [ -a, --all-hosts [ -k
*/path/to/admin.keyring* ] ]
@@ -70,20 +70,15 @@ Options
default is ``/etc/ceph/keyring`` (or whatever is specified in the
config file).
-.. option:: --mkbtrfs
+.. option:: --mkfs
- Create and mount the any btrfs file systems specified in the
- ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
- and (if it differs from "osd data") "btrfs path" options must be
- defined.
+ Create and mount the any file systems specified in the
+ ceph.conf for OSD data storage using mkfs. The "devs" and (if it
+ differs from "osd data") "fs path" options must be defined.
**NOTE** Btrfs is still considered experimental. This option
- can ease some configuration pain, but is the use of btrfs is not
- required when ``osd data`` directories are mounted manually by the
- adminstrator.
-
- **NOTE** This option is deprecated and will be removed in a future
- release.
+ can ease some configuration pain, but is not required when
+ ``osd data`` directories are mounted manually by the adminstrator.
.. option:: --no-copy-conf
diff --git a/man/mkcephfs.8 b/man/mkcephfs.8
index f0ab80d2ccc..26276b76030 100644
--- a/man/mkcephfs.8
+++ b/man/mkcephfs.8
@@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
.
.SH SYNOPSIS
.nf
-\fBmkcephfs\fP \-c \fIceph.conf\fP [ \-\-mkbtrfs ] [ \-a, \-\-all\-hosts [ \-k
+\fBmkcephfs\fP \-c \fIceph.conf\fP [ \-\-mkfs ] [ \-a, \-\-all\-hosts [ \-k
\fI/path/to/admin.keyring\fP ] ]
.fi
.sp
@@ -111,19 +111,16 @@ config file).
.UNINDENT
.INDENT 0.0
.TP
-.B \-\-mkbtrfs
-Create and mount the any btrfs file systems specified in the
-ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
-and (if it differs from "osd data") "btrfs path" options must be
+.B \-\-mkfs
+Create and mount any file systems specified in the
+ceph.conf for OSD data storage using mkfs.*. The "devs"
+and (if it differs from "osd data") "fs path" options must be
defined.
.sp
\fBNOTE\fP Btrfs is still considered experimental. This option
-can ease some configuration pain, but is the use of btrfs is not
+can ease some configuration pain, but the use of this option is not
required when \fBosd data\fP directories are mounted manually by the
adminstrator.
-.sp
-\fBNOTE\fP This option is deprecated and will be removed in a future
-release.
.UNINDENT
.INDENT 0.0
.TP
diff --git a/qa/run_xfstests.sh b/qa/run_xfstests.sh
index 958691cee09..1eba38a248d 100644
--- a/qa/run_xfstests.sh
+++ b/qa/run_xfstests.sh
@@ -49,7 +49,7 @@ XFS_MKFS_OPTIONS="-l su=32k"
# until we can work through getting them all passing reliably.
TESTS="1-9 11-15 17 19-21 26-29 31-34 41 46-48 50-54 56 61 63-67 69-70 74-76"
TESTS="${TESTS} 78 79 84-89 91-92 100 103 105 108 110 116-121 124 126"
-TESTS="${TESTS} 129-135 137-141 164-167 174 179 181-184 186-190 192 194"
+TESTS="${TESTS} 129-135 137-141 164-167 179 182-184 186-190 192 194"
TESTS="${TESTS} 196 199 201 203 214-216 220-227 234 236-238 241 243-249"
TESTS="${TESTS} 253 257-259 261 262 269 273 275 277 278 280 285 286"
# 275 was the highest available test as of 4/10/12.
diff --git a/src/Makefile.am b/src/Makefile.am
index 3a4437c0e49..eebb6c7cc5c 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1197,6 +1197,7 @@ libcommon_files = \
common/BackTrace.cc \
common/perf_counters.cc \
common/Mutex.cc \
+ common/OutputDataSocket.cc \
common/admin_socket.cc \
common/admin_socket_client.cc \
common/escape.c \
@@ -1475,6 +1476,7 @@ noinst_HEADERS = \
common/Finisher.h\
common/Formatter.h\
common/perf_counters.h\
+ common/OutputDataSocket.h \
common/admin_socket.h \
common/admin_socket_client.h \
common/shared_cache.hpp \
diff --git a/src/auth/Crypto.cc b/src/auth/Crypto.cc
index 77c959fba92..1bfc2814f4f 100644
--- a/src/auth/Crypto.cc
+++ b/src/auth/Crypto.cc
@@ -191,9 +191,13 @@ static void nss_aes_operation(CK_ATTRIBUTE_TYPE op, const bufferptr& secret,
goto err_op;
}
- PK11_DestroyContext(ctx, PR_TRUE);
out_tmp.set_length(written + written2);
out.append(out_tmp);
+
+ PK11_DestroyContext(ctx, PR_TRUE);
+ SECITEM_FreeItem(param, PR_TRUE);
+ PK11_FreeSymKey(key);
+ PK11_FreeSlot(slot);
return;
err_op:
diff --git a/src/auth/KeyRing.cc b/src/auth/KeyRing.cc
index ad23922d625..96dba74a043 100644
--- a/src/auth/KeyRing.cc
+++ b/src/auth/KeyRing.cc
@@ -236,7 +236,8 @@ void KeyRing::import(CephContext *cct, KeyRing& other)
for (map<EntityName, EntityAuth>::iterator p = other.keys.begin();
p != other.keys.end();
++p) {
- ldout(cct, 10) << " importing " << p->first << " " << p->second << dendl;
+ ldout(cct, 10) << " importing " << p->first << dendl;
+ ldout(cct, 30) << " " << p->second << dendl;
keys[p->first] = p->second;
}
}
diff --git a/src/auth/cephx/CephxKeyServer.cc b/src/auth/cephx/CephxKeyServer.cc
index 1440b2c2b9f..c3e4f9cfdc1 100644
--- a/src/auth/cephx/CephxKeyServer.cc
+++ b/src/auth/cephx/CephxKeyServer.cc
@@ -46,7 +46,7 @@ bool KeyServerData::get_service_secret(CephContext *cct, uint32_t service_id,
secret_id = riter->first;
secret = riter->second;
- ldout(cct, 10) << "get_service_secret service " << ceph_entity_type_name(service_id)
+ ldout(cct, 30) << "get_service_secret service " << ceph_entity_type_name(service_id)
<< " id " << secret_id << " " << secret << dendl;
return true;
}
@@ -77,12 +77,13 @@ bool KeyServerData::get_service_secret(CephContext *cct, uint32_t service_id,
if (riter == secrets.secrets.end()) {
ldout(cct, 10) << "get_service_secret service " << ceph_entity_type_name(service_id)
- << " secret " << secret_id << " not found; i have:" << dendl;
+ << " secret " << secret_id << " not found" << dendl;
+ ldout(cct, 30) << " I have:" << dendl;
for (map<uint64_t, ExpiringCryptoKey>::const_iterator iter =
secrets.secrets.begin();
iter != secrets.secrets.end();
++iter)
- ldout(cct, 10) << " id " << iter->first << " " << iter->second << dendl;
+ ldout(cct, 30) << " id " << iter->first << " " << iter->second << dendl;
return false;
}
@@ -170,7 +171,7 @@ bool KeyServer::_check_rotating_secrets()
void KeyServer::_dump_rotating_secrets()
{
- ldout(cct, 10) << "_dump_rotating_secrets" << dendl;
+ ldout(cct, 30) << "_dump_rotating_secrets" << dendl;
for (map<uint32_t, RotatingSecrets>::iterator iter = data.rotating_secrets.begin();
iter != data.rotating_secrets.end();
++iter) {
@@ -178,7 +179,7 @@ void KeyServer::_dump_rotating_secrets()
for (map<uint64_t, ExpiringCryptoKey>::iterator mapiter = key.secrets.begin();
mapiter != key.secrets.end();
++mapiter)
- ldout(cct, 10) << "service " << ceph_entity_type_name(iter->first)
+ ldout(cct, 30) << "service " << ceph_entity_type_name(iter->first)
<< " id " << mapiter->first
<< " key " << mapiter->second << dendl;
}
@@ -203,7 +204,8 @@ int KeyServer::_rotate_secret(uint32_t service_id)
}
ek.expiration += ttl;
uint64_t secret_id = r.add(ek);
- ldout(cct, 10) << "_rotate_secret adding " << ceph_entity_type_name(service_id)
+ ldout(cct, 10) << "_rotate_secret adding " << ceph_entity_type_name(service_id) << dendl;
+ ldout(cct, 30) << "_rotate_secret adding " << ceph_entity_type_name(service_id)
<< " id " << secret_id << " " << ek
<< dendl;
added++;
diff --git a/src/auth/cephx/CephxProtocol.cc b/src/auth/cephx/CephxProtocol.cc
index 9c262634e7b..34f31f70c72 100644
--- a/src/auth/cephx/CephxProtocol.cc
+++ b/src/auth/cephx/CephxProtocol.cc
@@ -118,7 +118,7 @@ bool cephx_build_service_ticket_reply(CephContext *cct,
}
::encode(blob, service_ticket_bl);
- ldout(cct, 20) << "service_ticket_blob is ";
+ ldout(cct, 30) << "service_ticket_blob is ";
service_ticket_bl.hexdump(*_dout);
*_dout << dendl;
diff --git a/src/ceph.conf.twoosds b/src/ceph.conf.twoosds
index c0cfc68f1a0..3417cf68fce 100644
--- a/src/ceph.conf.twoosds
+++ b/src/ceph.conf.twoosds
@@ -67,16 +67,17 @@
debug journal = 20
log dir = /data/cosd$id
osd data = /mnt/osd$id
- btrfs options = "flushoncommit,usertrans"
; user = root
; osd journal = /mnt/osd$id/journal
; osd journal size = 1000
osd journal = "/dev/disk/by-path/pci-0000:05:02.0-scsi-6:0:0:0"
+ osd mkfs type = btrfs
+ osd mount options btrfs = "flushoncommit,usertrans"
; filestore max sync interval = 1
- btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
-; btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
+ devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
+; devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
; /dev/disk/by-path/pci-0000:05:01.0-scsi-3:0:0:0 \
; /dev/disk/by-path/pci-0000:05:01.0-scsi-4:0:0:0 \
; /dev/disk/by-path/pci-0000:05:01.0-scsi-5:0:0:0 \
diff --git a/src/common/OutputDataSocket.cc b/src/common/OutputDataSocket.cc
new file mode 100644
index 00000000000..54f6ab4a2a4
--- /dev/null
+++ b/src/common/OutputDataSocket.cc
@@ -0,0 +1,418 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/Thread.h"
+#include "common/OutputDataSocket.h"
+#include "common/config.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/perf_counters.h"
+#include "common/pipe.h"
+#include "common/safe_io.h"
+#include "common/version.h"
+#include "common/Formatter.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <map>
+#include <poll.h>
+#include <set>
+#include <sstream>
+#include <stdint.h>
+#include <string.h>
+#include <string>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "include/compat.h"
+
+#define dout_subsys ceph_subsys_asok
+#undef dout_prefix
+#define dout_prefix *_dout << "asok(" << (void*)m_cct << ") "
+
+using std::ostringstream;
+
+/*
+ * UNIX domain sockets created by an application persist even after that
+ * application closes, unless they're explicitly unlinked. This is because the
+ * directory containing the socket keeps a reference to the socket.
+ *
+ * This code makes things a little nicer by unlinking those dead sockets when
+ * the application exits normally.
+ */
+static pthread_mutex_t cleanup_lock = PTHREAD_MUTEX_INITIALIZER;
+static std::vector <const char*> cleanup_files;
+static bool cleanup_atexit = false;
+
+static void remove_cleanup_file(const char *file)
+{
+ pthread_mutex_lock(&cleanup_lock);
+ TEMP_FAILURE_RETRY(unlink(file));
+ for (std::vector <const char*>::iterator i = cleanup_files.begin();
+ i != cleanup_files.end(); ++i) {
+ if (strcmp(file, *i) == 0) {
+ free((void*)*i);
+ cleanup_files.erase(i);
+ break;
+ }
+ }
+ pthread_mutex_unlock(&cleanup_lock);
+}
+
+static void remove_all_cleanup_files()
+{
+ pthread_mutex_lock(&cleanup_lock);
+ for (std::vector <const char*>::iterator i = cleanup_files.begin();
+ i != cleanup_files.end(); ++i) {
+ TEMP_FAILURE_RETRY(unlink(*i));
+ free((void*)*i);
+ }
+ cleanup_files.clear();
+ pthread_mutex_unlock(&cleanup_lock);
+}
+
+static void add_cleanup_file(const char *file)
+{
+ char *fname = strdup(file);
+ if (!fname)
+ return;
+ pthread_mutex_lock(&cleanup_lock);
+ cleanup_files.push_back(fname);
+ if (!cleanup_atexit) {
+ atexit(remove_all_cleanup_files);
+ cleanup_atexit = true;
+ }
+ pthread_mutex_unlock(&cleanup_lock);
+}
+
+
+OutputDataSocket::OutputDataSocket(CephContext *cct, uint64_t _backlog)
+ : m_cct(cct),
+ data_max_backlog(_backlog),
+ m_sock_fd(-1),
+ m_shutdown_rd_fd(-1),
+ m_shutdown_wr_fd(-1),
+ going_down(false),
+ m_lock("OutputDataSocket::m_lock")
+{
+}
+
+OutputDataSocket::~OutputDataSocket()
+{
+ shutdown();
+}
+
+/*
+ * This thread listens on the UNIX domain socket for incoming connections.
+ * It only handles one connection at a time at the moment. All I/O is nonblocking,
+ * so that we can implement sensible timeouts. [TODO: make all I/O nonblocking]
+ *
+ * This thread also listens to m_shutdown_rd_fd. If there is any data sent to this
+ * pipe, the thread terminates itself gracefully, allowing the
+ * OutputDataSocketConfigObs class to join() it.
+ */
+
+#define PFL_SUCCESS ((void*)(intptr_t)0)
+#define PFL_FAIL ((void*)(intptr_t)1)
+
+std::string OutputDataSocket::create_shutdown_pipe(int *pipe_rd, int *pipe_wr)
+{
+ int pipefd[2];
+ int ret = pipe_cloexec(pipefd);
+ if (ret < 0) {
+ ostringstream oss;
+ oss << "OutputDataSocket::create_shutdown_pipe error: " << cpp_strerror(ret);
+ return oss.str();
+ }
+
+ *pipe_rd = pipefd[0];
+ *pipe_wr = pipefd[1];
+ return "";
+}
+
+std::string OutputDataSocket::bind_and_listen(const std::string &sock_path, int *fd)
+{
+ ldout(m_cct, 5) << "bind_and_listen " << sock_path << dendl;
+
+ struct sockaddr_un address;
+ if (sock_path.size() > sizeof(address.sun_path) - 1) {
+ ostringstream oss;
+ oss << "OutputDataSocket::bind_and_listen: "
+ << "The UNIX domain socket path " << sock_path << " is too long! The "
+ << "maximum length on this system is "
+ << (sizeof(address.sun_path) - 1);
+ return oss.str();
+ }
+ int sock_fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ int err = errno;
+ ostringstream oss;
+ oss << "OutputDataSocket::bind_and_listen: "
+ << "failed to create socket: " << cpp_strerror(err);
+ return oss.str();
+ }
+ int r = fcntl(sock_fd, F_SETFD, FD_CLOEXEC);
+ if (r < 0) {
+ r = errno;
+ TEMP_FAILURE_RETRY(::close(sock_fd));
+ ostringstream oss;
+ oss << "OutputDataSocket::bind_and_listen: failed to fcntl on socket: " << cpp_strerror(r);
+ return oss.str();
+ }
+ memset(&address, 0, sizeof(struct sockaddr_un));
+ address.sun_family = AF_UNIX;
+ snprintf(address.sun_path, sizeof(address.sun_path),
+ "%s", sock_path.c_str());
+ if (bind(sock_fd, (struct sockaddr*)&address,
+ sizeof(struct sockaddr_un)) != 0) {
+ int err = errno;
+ if (err == EADDRINUSE) {
+ // The old UNIX domain socket must still be there.
+ // Let's unlink it and try again.
+ TEMP_FAILURE_RETRY(unlink(sock_path.c_str()));
+ if (bind(sock_fd, (struct sockaddr*)&address,
+ sizeof(struct sockaddr_un)) == 0) {
+ err = 0;
+ }
+ else {
+ err = errno;
+ }
+ }
+ if (err != 0) {
+ ostringstream oss;
+ oss << "OutputDataSocket::bind_and_listen: "
+ << "failed to bind the UNIX domain socket to '" << sock_path
+ << "': " << cpp_strerror(err);
+ close(sock_fd);
+ return oss.str();
+ }
+ }
+ if (listen(sock_fd, 5) != 0) {
+ int err = errno;
+ ostringstream oss;
+ oss << "OutputDataSocket::bind_and_listen: "
+ << "failed to listen to socket: " << cpp_strerror(err);
+ close(sock_fd);
+ TEMP_FAILURE_RETRY(unlink(sock_path.c_str()));
+ return oss.str();
+ }
+ *fd = sock_fd;
+ return "";
+}
+
+void* OutputDataSocket::entry()
+{
+ ldout(m_cct, 5) << "entry start" << dendl;
+ while (true) {
+ struct pollfd fds[2];
+ memset(fds, 0, sizeof(fds));
+ fds[0].fd = m_sock_fd;
+ fds[0].events = POLLIN | POLLRDBAND;
+ fds[1].fd = m_shutdown_rd_fd;
+ fds[1].events = POLLIN | POLLRDBAND;
+
+ int ret = poll(fds, 2, -1);
+ if (ret < 0) {
+ int err = errno;
+ if (err == EINTR) {
+ continue;
+ }
+ lderr(m_cct) << "OutputDataSocket: poll(2) error: '"
+ << cpp_strerror(err) << dendl;
+ return PFL_FAIL;
+ }
+
+ if (fds[0].revents & POLLIN) {
+ // Send out some data
+ do_accept();
+ }
+ if (fds[1].revents & POLLIN) {
+ // Parent wants us to shut down
+ return PFL_SUCCESS;
+ }
+ }
+ ldout(m_cct, 5) << "entry exit" << dendl;
+
+ return PFL_SUCCESS; // unreachable
+}
+
+
+bool OutputDataSocket::do_accept()
+{
+ struct sockaddr_un address;
+ socklen_t address_length = sizeof(address);
+ ldout(m_cct, 30) << "OutputDataSocket: calling accept" << dendl;
+ int connection_fd = accept(m_sock_fd, (struct sockaddr*) &address,
+ &address_length);
+ ldout(m_cct, 30) << "OutputDataSocket: finished accept" << dendl;
+ if (connection_fd < 0) {
+ int err = errno;
+ lderr(m_cct) << "OutputDataSocket: do_accept error: '"
+ << cpp_strerror(err) << dendl;
+ return false;
+ }
+
+ handle_connection(connection_fd);
+ close_connection(connection_fd);
+
+ return 0;
+}
+
+void OutputDataSocket::handle_connection(int fd)
+{
+ bufferlist bl;
+
+ m_lock.Lock();
+ init_connection(bl);
+ m_lock.Unlock();
+
+ if (bl.length()) {
+ /* need to special case the connection init buffer output, as it needs
+ * to be dumped before any data, including older data that was sent
+ * before the connection was established, or before we identified
+ * older connection was broken
+ */
+ int ret = safe_write(fd, bl.c_str(), bl.length());
+ if (ret < 0) {
+ return;
+ }
+ }
+
+ int ret = dump_data(fd);
+ if (ret < 0)
+ return;
+
+ do {
+ m_lock.Lock();
+ cond.Wait(m_lock);
+
+ if (going_down) {
+ m_lock.Unlock();
+ break;
+ }
+ m_lock.Unlock();
+
+ ret = dump_data(fd);
+ } while (ret >= 0);
+}
+
+int OutputDataSocket::dump_data(int fd)
+{
+ m_lock.Lock();
+ list<bufferlist> l;
+ l = data;
+ data.clear();
+ data_size = 0;
+ m_lock.Unlock();
+
+ for (list<bufferlist>::iterator iter = l.begin(); iter != l.end(); ++iter) {
+ bufferlist& bl = *iter;
+ int ret = safe_write(fd, bl.c_str(), bl.length());
+ if (ret >= 0) {
+ ret = safe_write(fd, delim.c_str(), delim.length());
+ }
+ if (ret < 0) {
+ for (; iter != l.end(); ++iter) {
+ bufferlist& bl = *iter;
+ data.push_back(bl);
+ data_size += bl.length();
+ }
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+void OutputDataSocket::close_connection(int fd)
+{
+ TEMP_FAILURE_RETRY(close(fd));
+}
+
+bool OutputDataSocket::init(const std::string &path)
+{
+ ldout(m_cct, 5) << "init " << path << dendl;
+
+ /* Set up things for the new thread */
+ std::string err;
+ int pipe_rd = -1, pipe_wr = -1;
+ err = create_shutdown_pipe(&pipe_rd, &pipe_wr);
+ if (!err.empty()) {
+ lderr(m_cct) << "OutputDataSocketConfigObs::init: error: " << err << dendl;
+ return false;
+ }
+ int sock_fd;
+ err = bind_and_listen(path, &sock_fd);
+ if (!err.empty()) {
+ lderr(m_cct) << "OutputDataSocketConfigObs::init: failed: " << err << dendl;
+ close(pipe_rd);
+ close(pipe_wr);
+ return false;
+ }
+
+ /* Create new thread */
+ m_sock_fd = sock_fd;
+ m_shutdown_rd_fd = pipe_rd;
+ m_shutdown_wr_fd = pipe_wr;
+ m_path = path;
+ create();
+ add_cleanup_file(m_path.c_str());
+ return true;
+}
+
+void OutputDataSocket::shutdown()
+{
+ m_lock.Lock();
+ going_down = true;
+ cond.Signal();
+ m_lock.Unlock();
+
+ if (m_shutdown_wr_fd < 0)
+ return;
+
+ ldout(m_cct, 5) << "shutdown" << dendl;
+
+ // Send a byte to the shutdown pipe that the thread is listening to
+ char buf[1] = { 0x0 };
+ int ret = safe_write(m_shutdown_wr_fd, buf, sizeof(buf));
+ TEMP_FAILURE_RETRY(close(m_shutdown_wr_fd));
+ m_shutdown_wr_fd = -1;
+
+ if (ret == 0) {
+ join();
+ } else {
+ lderr(m_cct) << "OutputDataSocket::shutdown: failed to write "
+ "to thread shutdown pipe: error " << ret << dendl;
+ }
+
+ remove_cleanup_file(m_path.c_str());
+ m_path.clear();
+}
+
+void OutputDataSocket::append_output(bufferlist& bl)
+{
+ Mutex::Locker l(m_lock);
+
+ if (data_size + bl.length() > data_max_backlog) {
+ ldout(m_cct, 20) << "dropping data output, max backlog reached" << dendl;
+ }
+ data.push_back(bl);
+
+ data_size += bl.length();
+
+ cond.Signal();
+}
diff --git a/src/common/OutputDataSocket.h b/src/common/OutputDataSocket.h
new file mode 100644
index 00000000000..f581a56bf03
--- /dev/null
+++ b/src/common/OutputDataSocket.h
@@ -0,0 +1,72 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_COMMON_OUTPUTDATASOCKET_H
+#define CEPH_COMMON_OUTPUTDATASOCKET_H
+
+#include "common/Thread.h"
+#include "common/Mutex.h"
+#include "common/Cond.h"
+
+#include <string>
+#include <map>
+#include <list>
+#include "include/buffer.h"
+
+class CephContext;
+
+class OutputDataSocket : public Thread
+{
+public:
+ OutputDataSocket(CephContext *cct, uint64_t _backlog);
+ virtual ~OutputDataSocket();
+
+ bool init(const std::string &path);
+
+ void append_output(bufferlist& bl);
+
+protected:
+ virtual void init_connection(bufferlist& bl) {}
+ void shutdown();
+
+ std::string create_shutdown_pipe(int *pipe_rd, int *pipe_wr);
+ std::string bind_and_listen(const std::string &sock_path, int *fd);
+
+ void *entry();
+ bool do_accept();
+
+ void handle_connection(int fd);
+ void close_connection(int fd);
+
+ int dump_data(int fd);
+
+ CephContext *m_cct;
+ uint64_t data_max_backlog;
+ std::string m_path;
+ int m_sock_fd;
+ int m_shutdown_rd_fd;
+ int m_shutdown_wr_fd;
+ bool going_down;
+
+ uint64_t data_size;
+
+ std::list<bufferlist> data;
+
+ Mutex m_lock;
+ Cond cond;
+
+ bufferlist delim;
+};
+
+#endif
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index cc05095bb3c..51f569c7e1f 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -470,6 +470,9 @@ OPTION(rgw_usage_max_shards, OPT_INT, 32)
OPTION(rgw_usage_max_user_shards, OPT_INT, 1)
OPTION(rgw_enable_ops_log, OPT_BOOL, true) // enable logging every rgw operation
OPTION(rgw_enable_usage_log, OPT_BOOL, true) // enable logging bandwidth usage
+OPTION(rgw_ops_log_rados, OPT_BOOL, true) // whether ops log should go to rados
+OPTION(rgw_ops_log_socket_path, OPT_STR, "") // path to unix domain socket where ops log can go
+OPTION(rgw_ops_log_data_backlog, OPT_INT, 5 << 20) // max data backlog for ops log
OPTION(rgw_usage_log_flush_threshold, OPT_INT, 1024) // threshold to flush pending log data
OPTION(rgw_usage_log_tick_interval, OPT_INT, 30) // flush pending log data every X seconds
OPTION(rgw_intent_log_object_name, OPT_STR, "%Y-%m-%d-%i-%n") // man date to see codes (a subset are supported)
diff --git a/src/common/sync_filesystem.h b/src/common/sync_filesystem.h
index 3ad8c9e928d..dc90b890c93 100644
--- a/src/common/sync_filesystem.h
+++ b/src/common/sync_filesystem.h
@@ -16,6 +16,7 @@
#define CEPH_SYNC_FILESYSTEM_H
#include <unistd.h>
+#include <syscall.h>
#ifndef __CYGWIN__
# ifndef DARWIN
@@ -35,6 +36,11 @@ inline int sync_filesystem(int fd)
return 0;
#endif
+#ifdef SYS_syncfs
+ if (syscall(SYS_syncfs, fd) == 0)
+ return 0;
+#endif
+
#ifdef BTRFS_IOC_SYNC
if (::ioctl(fd, BTRFS_IOC_SYNC) == 0)
return 0;
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 8d33839bbb7..f38a7698376 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -20,6 +20,25 @@ void CrushWrapper::find_roots(set<int>& roots) const
}
}
+bool CrushWrapper::subtree_contains(int root, int item) const
+{
+ if (root == item)
+ return true;
+
+ if (root >= 0)
+ return false; // root is a leaf
+
+ const crush_bucket *b = get_bucket(root);
+ if (!b)
+ return false;
+
+ for (unsigned j=0; j<b->size; j++) {
+ if (subtree_contains(b->items[j], item))
+ return true;
+ }
+ return false;
+}
+
int CrushWrapper::remove_item(CephContext *cct, int item)
{
@@ -230,9 +249,23 @@ int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string n
return -EINVAL;
}
+ // check that we aren't creating a cycle.
+ if (subtree_contains(id, cur)) {
+ ldout(cct, 1) << "insert_item item " << cur << " already exists beneath " << id << dendl;
+ return -EINVAL;
+ }
+
crush_bucket *b = get_bucket(id);
assert(b);
+ if (p->first != b->type) {
+ ldout(cct, 1) << "insert_item existing bucket has type "
+ << "'" << type_map[b->type] << "' != "
+ << "'" << type_map[p->first] << "'" << dendl;
+ return -EINVAL;
+ }
+
+
// make sure the item doesn't already exist in this bucket
for (unsigned j=0; j<b->size; j++)
if (b->items[j] == cur) {
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index e8120931103..5ea68b9a83d 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -212,6 +212,15 @@ public:
void find_roots(set<int>& roots) const;
/**
+ * see if an item is contained within a subtree
+ *
+ * @param root haystack
+ * @param item needle
+ * @return true if the item is located beneath the given node
+ */
+ bool subtree_contains(int root, int item) const;
+
+ /**
* see if item is located where we think it is
*
* This verifies that the given item is located at a particular
diff --git a/src/init-ceph.in b/src/init-ceph.in
index 31aeb287223..2bafa995f75 100644
--- a/src/init-ceph.in
+++ b/src/init-ceph.in
@@ -100,8 +100,8 @@ docrun=
allhosts=0
debug=0
monaddr=
-dobtrfs=1
-dobtrfsumount=0
+dofsmount=1
+dofsumount=0
verbose=0
while echo $1 | grep -q '^-'; do # FIXME: why not '^-'?
@@ -130,14 +130,14 @@ case $1 in
shift
MON_ADDR=$1
;;
- --btrfs)
- dobtrfs=1
+ --btrfs | --fsmount)
+ dofsmount=1
;;
- --nobtrfs)
- dobtrfs=0
+ --nobtrfs | --nofsmount)
+ dofsmount=0
;;
- --btrfsumount)
- dobtrfsumount=1
+ --btrfsumount | --fsumount)
+ dofsumount=1
;;
--conf | -c)
[ -z "$2" ] && usage_exit
@@ -222,9 +222,18 @@ for name in $what; do
if echo $name | grep -q ^osd; then
get_conf osd_data "" "osd data"
- get_conf btrfs_path "$osd_data" "btrfs path" # mount point defaults so osd data
- get_conf btrfs_devs "" "btrfs devs"
- first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
+ get_conf fs_path "$osd_data" "fs path" # mount point defaults so osd data
+ get_conf fs_devs "" "devs"
+ if [ -z "$fs_devs" ]; then
+ # try to fallback to old keys
+ get_conf tmp_btrfs_devs "" "btrfs devs"
+ if [ -n "$tmp_btrfs_devs" ]; then
+ fs_devs="$tmp_btrfs_devs"
+ else
+ echo No osd devs defined!
+ fi
+ fi
+ first_dev=`echo $fs_devs | cut '-d ' -f 1`
fi
# do lockfile, if RH
@@ -262,13 +271,44 @@ for name in $what; do
cmd="$wrap $cmd $runmode"
- if [ $dobtrfs -eq 1 ] && [ -n "$btrfs_devs" ]; then
+ if [ $dofsmount -eq 1 ] && [ -n "$fs_devs" ]; then
get_conf pre_mount "true" "pre mount command"
- get_conf btrfs_opt "noatime" "btrfs options"
- [ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
+ get_conf fs_type "" "osd mkfs type"
+
+ if [ -z "$fs_type" ]; then
+ # try to fallback to to old keys
+ get_conf tmp_devs "" "btrfs devs"
+ if [ -n "$tmp_devs" ]; then
+ fs_type = "btrfs"
+ else
+ echo No filesystem type defined!
+ exit 0
+ fi
+ fi
+
+ get_conf fs_opt "" "osd mount options $fs_type"
+ if [ -z "$fs_opt" ]; then
+ if [ "$fs_type" = "btrfs" ]; then
+ #try to fallback to old keys
+ get_conf fs_opt "" "btrfs options"
+ fi
+
+ if [ -z "$fs_opt" ]; then
+ #fallback to use at least noatime
+ fs_opt="rw,noatime"
+ fi
+ fi
+
+ [ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
[ -n "$pre_mount" ] && do_cmd "$pre_mount"
- echo Mounting Btrfs on $host:$btrfs_path
- do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $btrfs_path' /proc/mounts || mount -t btrfs $btrfs_opt $first_dev $btrfs_path"
+
+ if [ "$fs_type" == "btrfs" ]; then
+ echo Mounting Btrfs on $host:$fs_path
+ do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t btrfs $fs_opt $first_dev $fs_path"
+ else
+ echo Mounting $fs_type on $host:$fs_path
+ do_root_cmd "modprobe $fs_type ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t $fs_type $fs_opt $first_dev $fs_path"
+ fi
fi
echo Starting Ceph $name on $host...
mkdir -p $RUN_DIR
@@ -289,9 +329,9 @@ for name in $what; do
stop_daemon $name ceph-$type $pid_file
[ -n "$post_stop" ] && do_cmd "$post_stop"
[ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
- if [ $dobtrfsumount -eq 1 ] && [ -n "$btrfs_devs" ]; then
- echo Unmounting Btrfs on $host:$btrfs_path
- do_root_cmd "umount $btrfs_path || true"
+ if [ $dofsumount -eq 1 ] && [ -n "$fs_devs" ]; then
+ echo Unmounting OSD volume on $host:$fs_path
+ do_root_cmd "umount $fs_path || true"
fi
;;
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 4642a1346ff..b0fd9ec0c15 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -1754,7 +1754,7 @@ CInode* Server::prepare_new_inode(MDRequest *mdr, CDir *dir, inodeno_t useino, u
in->inode.gid = diri->inode.gid;
if (S_ISDIR(mode)) {
dout(10) << " new dir also sticky" << dendl;
- mode |= S_ISGID;
+ in->inode.mode |= S_ISGID;
}
} else
in->inode.gid = mdr->client_request->get_caller_gid();
diff --git a/src/mkcephfs.in b/src/mkcephfs.in
index aae616c68a0..1cb135e7929 100644
--- a/src/mkcephfs.in
+++ b/src/mkcephfs.in
@@ -60,7 +60,7 @@ else
fi
usage_exit() {
- echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkbtrfs]"
+ echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkfs]"
echo " to generate a new ceph cluster on all nodes; for advanced usage see man page"
echo " ** be careful, this WILL clobber old data; check your ceph.conf carefully **"
exit
@@ -70,7 +70,7 @@ usage_exit() {
allhosts=0
-mkbtrfs=0
+mkfs=0
preparemonmap=0
prepareosdfs=""
initdaemon=""
@@ -130,8 +130,8 @@ case $1 in
preparemon=1
manual_action=1
;;
- --mkbtrfs)
- mkbtrfs=1
+ --mkbtrfs | --mkfs)
+ mkfs=1
;;
--no-copy-conf)
nocopyconf=1
@@ -307,21 +307,48 @@ if [ -n "$prepareosdfs" ]; then
get_conf osd_data "/var/lib/ceph/osd/ceph-$id" "osd data"
get_conf osd_journal "$osd_data/journal" "osd journal"
- get_conf btrfs_path "$osd_data" "btrfs path" # mount point defaults so osd data
- get_conf btrfs_devs "" "btrfs devs"
-
- if [ -z "$btrfs_devs" ]; then
- echo "no btrfs devs defined for $name"
- exit 0
+ get_conf fs_path "$osd_data" "fs path" # mount point defaults so osd data
+ get_conf fs_devs "" "devs"
+ get_conf fs_type "" "osd mkfs type"
+
+ if [ -z "$fs_devs" ]; then
+ # try to fallback to old keys
+ get_conf tmp_btrfs_devs "" "btrfs devs"
+ if [ -n "$tmp_btrfs_devs" ]; then
+ fs_devs="$tmp_btrfs_devs"
+ else
+ echo "no devs defined for $name"
+ exit 0
+ fi
+ fi
+ if [ -z "$fs_type" ]; then
+ # try to fallback to to old keys
+ get_conf tmp_devs "" "btrfs devs"
+ if [ -n "$tmp_devs" ]; then
+ fs_type = "btrfs"
+ else
+ echo No filesystem type defined!
+ exit 0
+ fi
fi
- first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
- get_conf btrfs_opt "noatime" "btrfs options"
- [ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
+ first_dev=`echo $fs_devs | cut '-d ' -f 1`
+ get_conf fs_opt "" "osd mount options $fs_type"
+ if [ -z "$fs_opt" ]; then
+ if [ "$fs_type" = "btrfs" ]; then
+ #try to fallback to old keys
+ get_conf fs_opt "" "btrfs options"
+ fi
+ if [ -z "$fs_opt" ]; then
+ #fallback to use at least rw,noatime
+ fs_opt="rw,noatime"
+ fi
+ fi
+ [ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
get_conf osd_user "root" "user"
- if [ -n "$osd_journal" ] && echo "$btrfs_devs" | grep -q -w "$osd_journal" ; then
- echo "ERROR: osd journal device ($osd_journal) also used by btrfs devs ($btrfs_devs)"
+ if [ -n "$osd_journal" ] && echo "fs_devs" | grep -q -w "$osd_journal" ; then
+ echo "ERROR: osd journal device ($osd_journal) also used by devs ($fs_devs)"
exit 1
fi
@@ -331,19 +358,22 @@ if [ -n "$prepareosdfs" ]; then
test -d $osd_journal || mkdir -p `dirname $osd_journal`
fi
- umount $btrfs_path || true
- for f in $btrfs_devs ; do
+ umount $fs_path || true
+ for f in $fs_devs ; do
umount $f || true
done
- modprobe btrfs || true
- mkfs.btrfs $btrfs_devs
- btrfs device scan || btrfsctl -a
- sync # seems to fix problems for some people...
- mount -t btrfs $btrfs_opt $first_dev $btrfs_path
- chown $osd_user $btrfs_path
- chmod +w $btrfs_path
-
+ get_conf mkfs_opt "" "osd mkfs options $fs_type"
+ if [ "$fs_type" == "xfs" ] && [ -z "$mkfs_opt" ]; then
+ echo Xfs filesystem found add missing -f mkfs option!
+ mkfs_opt="-f"
+ fi
+ modprobe $fs_type || true
+ mkfs.$fs_type $mkfs_opt $fs_devs
+ mount -t $fs_type $fs_opt $first_dev $fs_path
+ chown $osd_user $fs_path
+ chmod +w $fs_path
+
exit 0
fi
@@ -460,7 +490,7 @@ if [ $allhosts -eq 1 ]; then
fi
fi
- if [ $mkbtrfs -eq 1 ] && [ "$type" = "osd" ]; then
+ if [ $mkfs -eq 1 ] && [ "$type" = "osd" ]; then
do_root_cmd "$0 -d $rdir --prepare-osdfs $name"
fi
diff --git a/src/mon/AuthMonitor.cc b/src/mon/AuthMonitor.cc
index 030153e17df..c43d7e988db 100644
--- a/src/mon/AuthMonitor.cc
+++ b/src/mon/AuthMonitor.cc
@@ -549,7 +549,8 @@ void AuthMonitor::import_keyring(KeyRing& keyring)
auth_inc.name = p->first;
auth_inc.auth = p->second;
auth_inc.op = KeyServerData::AUTH_INC_ADD;
- dout(10) << " importing " << auth_inc.name << " " << auth_inc.auth << dendl;
+ dout(10) << " importing " << auth_inc.name << dendl;
+ dout(30) << " " << auth_inc.auth << dendl;
push_cephx_inc(auth_inc);
}
}
@@ -627,7 +628,8 @@ bool AuthMonitor::prepare_command(MMonCommand *m)
for (unsigned i=3; i+1<m->cmd.size(); i += 2)
::encode(m->cmd[i+1], auth_inc.auth.caps[m->cmd[i]]);
- dout(10) << " importing " << auth_inc.name << " " << auth_inc.auth << dendl;
+ dout(10) << " importing " << auth_inc.name << dendl;
+ dout(30) << " " << auth_inc.auth << dendl;
push_cephx_inc(auth_inc);
ss << "added key for " << auth_inc.name;
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 469cbd03aa5..4a64e48de8d 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -207,14 +207,26 @@ void Monitor::recovered_leader(int id)
require_gv_onwire();
}
- for (vector<Paxos*>::iterator p = paxos.begin(); p != paxos.end(); p++)
+ for (vector<Paxos*>::iterator p = paxos.begin(); p != paxos.end(); p++) {
+ if (!(*p)->is_active())
+ continue;
finish_contexts(g_ceph_context, (*p)->waiting_for_active);
- for (vector<Paxos*>::iterator p = paxos.begin(); p != paxos.end(); p++)
+ }
+ for (vector<Paxos*>::iterator p = paxos.begin(); p != paxos.end(); p++) {
+ if (!(*p)->is_active())
+ continue;
finish_contexts(g_ceph_context, (*p)->waiting_for_commit);
- for (vector<Paxos*>::iterator p = paxos.begin(); p != paxos.end(); p++)
+ }
+ for (vector<Paxos*>::iterator p = paxos.begin(); p != paxos.end(); p++) {
+ if (!(*p)->is_readable())
+ continue;
finish_contexts(g_ceph_context, (*p)->waiting_for_readable);
- for (vector<Paxos*>::iterator p = paxos.begin(); p != paxos.end(); p++)
+ }
+ for (vector<Paxos*>::iterator p = paxos.begin(); p != paxos.end(); p++) {
+ if (!(*p)->is_writeable())
+ continue;
finish_contexts(g_ceph_context, (*p)->waiting_for_writeable);
+ }
}
}
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index 2bdfbc4b5eb..dcf9380613b 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -991,12 +991,17 @@ bool PGMonitor::preprocess_command(MMonCommand *m)
r = -EINVAL;
if (pgid.parse(m->cmd[2].c_str())) {
vector<int> up, acting;
- pg_t mpgid = mon->osdmon()->osdmap.raw_pg_to_pg(pgid);
- mon->osdmon()->osdmap.pg_to_up_acting_osds(pgid, up, acting);
- ss << "osdmap e" << mon->osdmon()->osdmap.get_epoch()
- << " pg " << pgid << " (" << mpgid << ")"
- << " -> up " << up << " acting " << acting;
- r = 0;
+ if (mon->osdmon()->osdmap.have_pg_pool(pgid.pool())) {
+ pg_t mpgid = mon->osdmon()->osdmap.raw_pg_to_pg(pgid);
+ mon->osdmon()->osdmap.pg_to_up_acting_osds(pgid, up, acting);
+ ss << "osdmap e" << mon->osdmon()->osdmap.get_epoch()
+ << " pg " << pgid << " (" << mpgid << ")"
+ << " -> up " << up << " acting " << acting;
+ r = 0;
+ } else {
+ r = -ENOENT;
+ ss << "pg '" << m->cmd[2] << "' does not exist";
+ }
} else
ss << "invalid pgid '" << m->cmd[2] << "'";
}
diff --git a/src/msg/Accepter.cc b/src/msg/Accepter.cc
index b95d26afb08..ac180e21441 100644
--- a/src/msg/Accepter.cc
+++ b/src/msg/Accepter.cc
@@ -138,9 +138,11 @@ int Accepter::bind(const entity_addr_t &bind_addr, int avoid_port1, int avoid_po
assert(msgr->get_need_addr()); // should still be true.
if (msgr->get_myaddr().get_port() == 0) {
- listen_addr.nonce = nonce;
msgr->set_myaddr(listen_addr);
}
+ entity_addr_t addr = msgr->get_myaddr();
+ addr.nonce = nonce;
+ msgr->set_myaddr(addr);
msgr->init_local_connection();
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index b8f01c2e1ac..1d6797c2b32 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -25,6 +25,7 @@
#if defined(__linux__)
#include <linux/fs.h>
+#include <syscall.h>
#endif
#include <iostream>
@@ -1227,8 +1228,17 @@ int FileStore::_detect_fs()
dout(0) << "mount syncfs(2) syscall supported by glibc BUT NOT the kernel" << dendl;
}
#else
+#ifdef SYS_syncfs
+ if (syscall(SYS_syncfs, fd) == 0) {
+ dout(0) << "mount syscall(SYS_syncfs, fd) fully supported" << dendl;
+ have_syncfs = true;
+ } else {
+ dout(0) << "mount syscall(SYS_syncfs, fd) supported by libc BUT NOT the kernel" << dendl;
+ }
+#else
dout(0) << "mount syncfs(2) syscall not support by glibc" << dendl;
#endif
+#endif
if (!have_syncfs) {
if (btrfs) {
dout(0) << "mount no syncfs(2), but the btrfs SYNC ioctl will suffice" << dendl;
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 8edf3a55c4b..c933bad164a 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -703,6 +703,7 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start,
if (r == -ENOENT) {
ldout(cct, 10) << "bh_read_finish removing " << *bh << dendl;
bh_remove(ob, bh);
+ delete bh;
continue;
}
diff --git a/src/osdc/Striper.cc b/src/osdc/Striper.cc
index cdbfe61674d..26e2b917bb7 100644
--- a/src/osdc/Striper.cc
+++ b/src/osdc/Striper.cc
@@ -231,12 +231,15 @@ void Striper::StripedReadResult::add_partial_sparse_result(CephContext *cct,
if (s->first > bl_off) {
// gap in sparse read result
pair<bufferlist, uint64_t>& r = partial[tofs];
- size_t gap = s->first - bl_off;
+ size_t gap = MIN(s->first - bl_off, tlen);
ldout(cct, 20) << " s gap " << gap << ", skipping" << dendl;
r.second = gap;
bl_off += gap;
tofs += gap;
tlen -= gap;
+ if (tlen == 0) {
+ continue;
+ }
}
assert(s->first <= bl_off);
diff --git a/src/rbd.cc b/src/rbd.cc
index fa7648a2148..09d506f1ef7 100644
--- a/src/rbd.cc
+++ b/src/rbd.cc
@@ -472,7 +472,11 @@ static int do_purge_snaps(librbd::Image& image)
}
for (size_t i = 0; i < snaps.size(); ++i) {
- image.snap_remove(snaps[i].name.c_str());
+ r = image.snap_remove(snaps[i].name.c_str());
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
pc.update_progress(i + 1, snaps.size());
}
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 5a5b7406aaf..665ac5516e8 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -1361,25 +1361,8 @@ int main(int argc, char **argv)
goto next;
if (show_log_entries) {
- formatter->open_object_section("log_entry");
- formatter->dump_string("bucket", entry.bucket);
- entry.time.gmtime(formatter->dump_stream("time")); // UTC
- entry.time.localtime(formatter->dump_stream("time_local"));
- formatter->dump_string("remote_addr", entry.remote_addr);
- if (entry.object_owner.length())
- formatter->dump_string("object_owner", entry.object_owner);
- formatter->dump_string("user", entry.user);
- formatter->dump_string("operation", entry.op);
- formatter->dump_string("uri", entry.uri);
- formatter->dump_string("http_status", entry.http_status);
- formatter->dump_string("error_code", entry.error_code);
- formatter->dump_int("bytes_sent", entry.bytes_sent);
- formatter->dump_int("bytes_received", entry.bytes_received);
- formatter->dump_int("object_size", entry.obj_size);
- formatter->dump_int("total_time", total_time);
- formatter->dump_string("user_agent", entry.user_agent);
- formatter->dump_string("referrer", entry.referrer);
- formatter->close_section();
+
+ rgw_format_ops_log_entry(entry, formatter);
formatter->flush(cout);
}
next:
diff --git a/src/rgw/rgw_client_io.cc b/src/rgw/rgw_client_io.cc
index 740418c9ab1..46385f41f33 100644
--- a/src/rgw/rgw_client_io.cc
+++ b/src/rgw/rgw_client_io.cc
@@ -53,8 +53,7 @@ int RGWClientIO::read(char *buf, int max, int *actual)
*actual = ret;
- if (account)
- bytes_received += *actual;
+ bytes_received += *actual;
return 0;
}
diff --git a/src/rgw/rgw_client_io.h b/src/rgw/rgw_client_io.h
index d4842e97e6e..546b16d51f7 100644
--- a/src/rgw/rgw_client_io.h
+++ b/src/rgw/rgw_client_io.h
@@ -3,6 +3,8 @@
#include <stdlib.h>
+#include "include/types.h"
+
class RGWClientIO {
bool account;
@@ -27,6 +29,9 @@ public:
void set_account(bool _account) {
account = _account;
}
+
+ uint64_t get_bytes_sent() { return bytes_sent; }
+ uint64_t get_bytes_received() { return bytes_received; }
};
#endif
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index db8013e6120..35b31be60c0 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -106,8 +106,6 @@ req_state::req_state(CephContext *_cct, struct RGWEnv *e) : cct(_cct), cio(NULL)
object = NULL;
header_ended = false;
- bytes_sent = 0;
- bytes_received = 0;
obj_size = 0;
prot_flags = 0;
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index c55907a38d3..f43a2c78a1f 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -582,8 +582,6 @@ struct req_state {
struct rgw_err err;
bool expect_cont;
bool header_ended;
- uint64_t bytes_sent; // bytes sent as a response, excluding header
- uint64_t bytes_received; // data received
uint64_t obj_size;
bool enable_ops_log;
bool enable_usage_log;
diff --git a/src/rgw/rgw_gc.cc b/src/rgw/rgw_gc.cc
index 361e1b04ae0..d7861e61250 100644
--- a/src/rgw/rgw_gc.cc
+++ b/src/rgw/rgw_gc.cc
@@ -34,9 +34,7 @@ void RGWGC::initialize(CephContext *_cct, RGWRados *_store) {
void RGWGC::finalize()
{
- for (int i = 0; i < max_objs; i++) {
- delete[] obj_names;
- }
+ delete[] obj_names;
}
int RGWGC::tag_index(const string& tag)
diff --git a/src/rgw/rgw_gc.h b/src/rgw/rgw_gc.h
index db1753a100f..b19afc55dd5 100644
--- a/src/rgw/rgw_gc.h
+++ b/src/rgw/rgw_gc.h
@@ -36,6 +36,10 @@ class RGWGC {
GCWorker *worker;
public:
RGWGC() : cct(NULL), store(NULL), max_objs(0), obj_names(NULL), worker(NULL) {}
+ ~RGWGC() {
+ stop_processor();
+ finalize();
+ }
void add_chain(librados::ObjectWriteOperation& op, cls_rgw_obj_chain& chain, const string& tag);
int send_chain(cls_rgw_obj_chain& chain, const string& tag, bool sync);
diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc
index 5406a2c924b..e999f623a01 100644
--- a/src/rgw/rgw_log.cc
+++ b/src/rgw/rgw_log.cc
@@ -1,10 +1,13 @@
#include "common/Clock.h"
#include "common/Timer.h"
#include "common/utf8.h"
+#include "common/OutputDataSocket.h"
+#include "common/Formatter.h"
#include "rgw_log.h"
#include "rgw_acl.h"
#include "rgw_rados.h"
+#include "rgw_client_io.h"
#define dout_subsys ceph_subsys_rgw
@@ -175,7 +178,10 @@ static void log_usage(struct req_state *s, const string& op_name)
rgw_usage_log_entry entry(user, s->bucket.name);
- rgw_usage_data data(s->bytes_sent, s->bytes_received);
+ uint64_t bytes_sent = s->cio->get_bytes_sent();
+ uint64_t bytes_received = s->cio->get_bytes_received();
+
+ rgw_usage_data data(bytes_sent, bytes_received);
data.ops = 1;
if (!s->err.is_err())
@@ -188,7 +194,67 @@ static void log_usage(struct req_state *s, const string& op_name)
usage_logger->insert(ts, entry);
}
-int rgw_log_op(RGWRados *store, struct req_state *s, const string& op_name)
+void rgw_format_ops_log_entry(struct rgw_log_entry& entry, Formatter *formatter)
+{
+ formatter->open_object_section("log_entry");
+ formatter->dump_string("bucket", entry.bucket);
+ entry.time.gmtime(formatter->dump_stream("time")); // UTC
+ entry.time.localtime(formatter->dump_stream("time_local"));
+ formatter->dump_string("remote_addr", entry.remote_addr);
+ if (entry.object_owner.length())
+ formatter->dump_string("object_owner", entry.object_owner);
+ formatter->dump_string("user", entry.user);
+ formatter->dump_string("operation", entry.op);
+ formatter->dump_string("uri", entry.uri);
+ formatter->dump_string("http_status", entry.http_status);
+ formatter->dump_string("error_code", entry.error_code);
+ formatter->dump_int("bytes_sent", entry.bytes_sent);
+ formatter->dump_int("bytes_received", entry.bytes_received);
+ formatter->dump_int("object_size", entry.obj_size);
+ uint64_t total_time = entry.total_time.sec() * 1000000LL * entry.total_time.usec();
+
+ formatter->dump_int("total_time", total_time);
+ formatter->dump_string("user_agent", entry.user_agent);
+ formatter->dump_string("referrer", entry.referrer);
+ formatter->close_section();
+}
+
+void OpsLogSocket::formatter_to_bl(bufferlist& bl)
+{
+ stringstream ss;
+ formatter->flush(ss);
+ const string& s = ss.str();
+
+ bl.append(s);
+}
+
+void OpsLogSocket::init_connection(bufferlist& bl)
+{
+ bl.append("[");
+}
+
+OpsLogSocket::OpsLogSocket(CephContext *cct, uint64_t _backlog) : OutputDataSocket(cct, _backlog)
+{
+ formatter = new JSONFormatter;
+ delim.append(",\n");
+}
+
+OpsLogSocket::~OpsLogSocket()
+{
+ delete formatter;
+}
+
+void OpsLogSocket::log(struct rgw_log_entry& entry)
+{
+ bufferlist bl;
+
+ rgw_format_ops_log_entry(entry, formatter);
+ formatter_to_bl(bl);
+
+ append_output(bl);
+}
+
+int rgw_log_op(RGWRados *store, struct req_state *s, const string& op_name, OpsLogSocket *olog)
{
struct rgw_log_entry entry;
string bucket_id;
@@ -240,10 +306,13 @@ int rgw_log_op(RGWRados *store, struct req_state *s, const string& op_name)
entry.object_owner = s->object_acl->get_owner().get_id();
entry.bucket_owner = s->bucket_owner;
+ uint64_t bytes_sent = s->cio->get_bytes_sent();
+ uint64_t bytes_received = s->cio->get_bytes_received();
+
entry.time = s->time;
entry.total_time = ceph_clock_now(s->cct) - s->time;
- entry.bytes_sent = s->bytes_sent;
- entry.bytes_received = s->bytes_received;
+ entry.bytes_sent = bytes_sent;
+ entry.bytes_received = bytes_received;
if (s->err.http_ret) {
char buf[16];
snprintf(buf, sizeof(buf), "%d", s->err.http_ret);
@@ -263,19 +332,27 @@ int rgw_log_op(RGWRados *store, struct req_state *s, const string& op_name)
gmtime_r(&t, &bdt);
else
localtime_r(&t, &bdt);
-
- string oid = render_log_object_name(s->cct->_conf->rgw_log_object_name, &bdt,
- s->bucket.bucket_id, entry.bucket.c_str());
- rgw_obj obj(store->params.log_pool, oid);
+ int ret = 0;
+
+ if (s->cct->_conf->rgw_ops_log_rados) {
+ string oid = render_log_object_name(s->cct->_conf->rgw_log_object_name, &bdt,
+ s->bucket.bucket_id, entry.bucket.c_str());
+
+ rgw_obj obj(store->params.log_pool, oid);
- int ret = store->append_async(obj, bl.length(), bl);
- if (ret == -ENOENT) {
- ret = store->create_pool(store->params.log_pool);
- if (ret < 0)
- goto done;
- // retry
ret = store->append_async(obj, bl.length(), bl);
+ if (ret == -ENOENT) {
+ ret = store->create_pool(store->params.log_pool);
+ if (ret < 0)
+ goto done;
+ // retry
+ ret = store->append_async(obj, bl.length(), bl);
+ }
+ }
+
+ if (olog) {
+ olog->log(entry);
}
done:
if (ret < 0)
diff --git a/src/rgw/rgw_log.h b/src/rgw/rgw_log.h
index f832c1cadeb..823f0b1767f 100644
--- a/src/rgw/rgw_log.h
+++ b/src/rgw/rgw_log.h
@@ -3,6 +3,8 @@
#include "rgw_common.h"
#include "include/utime.h"
+#include "common/Formatter.h"
+#include "common/OutputDataSocket.h"
class RGWRados;
@@ -115,11 +117,27 @@ struct rgw_intent_log_entry {
};
WRITE_CLASS_ENCODER(rgw_intent_log_entry)
-int rgw_log_op(RGWRados *store, struct req_state *s, const string& op_name);
+class OpsLogSocket : public OutputDataSocket {
+ Formatter *formatter;
+
+ void formatter_to_bl(bufferlist& bl);
+
+protected:
+ void init_connection(bufferlist& bl);
+
+public:
+ OpsLogSocket(CephContext *cct, uint64_t _backlog);
+ ~OpsLogSocket();
+
+ void log(struct rgw_log_entry& entry);
+};
+
+int rgw_log_op(RGWRados *store, struct req_state *s, const string& op_name, OpsLogSocket *olog);
int rgw_log_intent(RGWRados *store, rgw_obj& obj, RGWIntentEvent intent, const utime_t& timestamp, bool utc);
int rgw_log_intent(RGWRados *store, struct req_state *s, rgw_obj& obj, RGWIntentEvent intent);
void rgw_log_usage_init(CephContext *cct, RGWRados *store);
void rgw_log_usage_finalize();
+void rgw_format_ops_log_entry(struct rgw_log_entry& entry, Formatter *formatter);
#endif
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index 944b59a5c8d..5f52dde228d 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -61,20 +61,12 @@ static sighandler_t sighandler_usr1;
static sighandler_t sighandler_alrm;
static sighandler_t sighandler_term;
+class RGWProcess;
-#define SOCKET_BACKLOG 1024
+static RGWProcess *pprocess = NULL;
-static void godown_handler(int signum)
-{
- FCGX_ShutdownPending();
- signal(signum, sighandler_usr1);
- alarm(5);
-}
-static void godown_alarm(int signum)
-{
- _exit(0);
-}
+#define SOCKET_BACKLOG 1024
struct RGWRequest
{
@@ -127,10 +119,12 @@ struct RGWRequest
class RGWProcess {
RGWRados *store;
+ OpsLogSocket *olog;
deque<RGWRequest *> m_req_queue;
ThreadPool m_tp;
Throttle req_throttle;
RGWREST *rest;
+ int sock_fd;
struct RGWWQ : public ThreadPool::WorkQueue<RGWRequest> {
RGWProcess *process;
@@ -185,20 +179,25 @@ class RGWProcess {
uint64_t max_req_id;
public:
- RGWProcess(CephContext *cct, RGWRados *rgwstore, int num_threads, RGWREST *_rest)
- : store(rgwstore), m_tp(cct, "RGWProcess::m_tp", num_threads),
+ RGWProcess(CephContext *cct, RGWRados *rgwstore, OpsLogSocket *_olog, int num_threads, RGWREST *_rest)
+ : store(rgwstore), olog(_olog), m_tp(cct, "RGWProcess::m_tp", num_threads),
req_throttle(cct, "rgw_ops", num_threads * 2),
- rest(_rest),
+ rest(_rest), sock_fd(-1),
req_wq(this, g_conf->rgw_op_thread_timeout,
g_conf->rgw_op_thread_suicide_timeout, &m_tp),
max_req_id(0) {}
void run();
void handle_request(RGWRequest *req);
+
+ void close_fd() {
+ if (sock_fd >= 0)
+ close(sock_fd);
+ }
};
void RGWProcess::run()
{
- int s = 0;
+ sock_fd = 0;
if (!g_conf->rgw_socket_path.empty()) {
string path_str = g_conf->rgw_socket_path;
@@ -216,9 +215,9 @@ void RGWProcess::run()
}
const char *path = path_str.c_str();
- s = FCGX_OpenSocket(path, SOCKET_BACKLOG);
- if (s < 0) {
- dout(0) << "ERROR: FCGX_OpenSocket (" << path << ") returned " << s << dendl;
+ sock_fd = FCGX_OpenSocket(path, SOCKET_BACKLOG);
+ if (sock_fd < 0) {
+ dout(0) << "ERROR: FCGX_OpenSocket (" << path << ") returned " << sock_fd << dendl;
return;
}
if (chmod(path, 0777) < 0) {
@@ -232,7 +231,7 @@ void RGWProcess::run()
RGWRequest *req = new RGWRequest;
req->id = ++max_req_id;
dout(10) << "allocated request req=" << hex << req << dec << dendl;
- FCGX_InitRequest(&req->fcgx, s, 0);
+ FCGX_InitRequest(&req->fcgx, sock_fd, 0);
req_throttle.get(1);
int ret = FCGX_Accept_r(&req->fcgx);
if (ret < 0) {
@@ -248,6 +247,19 @@ void RGWProcess::run()
m_tp.stop();
}
+static void godown_handler(int signum)
+{
+ FCGX_ShutdownPending();
+ pprocess->close_fd();
+ signal(signum, sighandler_usr1);
+ alarm(5);
+}
+
+static void godown_alarm(int signum)
+{
+ _exit(0);
+}
+
static int call_log_intent(RGWRados *store, void *ctx, rgw_obj& obj, RGWIntentEvent intent)
{
struct req_state *s = (struct req_state *)ctx;
@@ -331,7 +343,7 @@ void RGWProcess::handle_request(RGWRequest *req)
op->execute();
op->complete();
done:
- rgw_log_op(store, s, (op ? op->name() : "unknown"));
+ rgw_log_op(store, s, (op ? op->name() : "unknown"), olog);
int http_ret = s->err.http_ret;
@@ -485,8 +497,18 @@ int main(int argc, const char **argv)
rest.register_resource(g_conf->rgw_admin_entry, admin_resource);
}
- RGWProcess process(g_ceph_context, store, g_conf->rgw_thread_pool_size, &rest);
- process.run();
+ OpsLogSocket *olog = NULL;
+
+ if (!g_conf->rgw_ops_log_socket_path.empty()) {
+ olog = new OpsLogSocket(g_ceph_context, g_conf->rgw_ops_log_data_backlog);
+ olog->init(g_conf->rgw_ops_log_socket_path);
+ }
+
+ pprocess = new RGWProcess(g_ceph_context, store, olog, g_conf->rgw_thread_pool_size, &rest);
+
+ pprocess->run();
+
+ delete pprocess;
if (do_swift) {
swift_finalize();
@@ -494,12 +516,22 @@ int main(int argc, const char **argv)
rgw_log_usage_finalize();
+ delete olog;
+
rgw_perf_stop(g_ceph_context);
unregister_async_signal_handler(SIGHUP, sighup_handler);
RGWStoreManager::close_storage(store);
+ rgw_tools_cleanup();
+ curl_global_cleanup();
+ g_ceph_context->put();
+
+ shutdown_async_signal_handler();
+
+ ceph::crypto::shutdown();
+
return 0;
}
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 5da22d2ad7f..49863c3046d 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -1747,7 +1747,6 @@ void RGWPutACLs::execute()
RGWACLXMLParser_S3 parser(s->cct);
RGWAccessControlPolicy_S3 new_policy(s->cct);
stringstream ss;
- char *orig_data = data;
char *new_data = NULL;
ACLOwner owner;
rgw_obj obj;
@@ -1756,27 +1755,29 @@ void RGWPutACLs::execute()
if (!parser.init()) {
ret = -EINVAL;
- goto done;
+ return;
}
owner.set_id(s->user.user_id);
owner.set_name(s->user.display_name);
- if (get_params() < 0)
- goto done;
+ ret = get_params();
+ if (ret < 0)
+ return;
ldout(s->cct, 15) << "read len=" << len << " data=" << (data ? data : "") << dendl;
if (!s->canned_acl.empty() && len) {
ret = -EINVAL;
- goto done;
+ return;
}
if (!s->canned_acl.empty()) {
ret = get_canned_policy(owner, ss);
if (ret < 0)
- goto done;
+ return;
new_data = strdup(ss.str().c_str());
+ free(data);
data = new_data;
len = ss.str().size();
}
@@ -1784,12 +1785,12 @@ void RGWPutACLs::execute()
if (!parser.parse(data, len, 1)) {
ret = -EACCES;
- goto done;
+ return;
}
policy = (RGWAccessControlPolicy_S3 *)parser.find_first("AccessControlPolicy");
if (!policy) {
ret = -EINVAL;
- goto done;
+ return;
}
if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15)) {
@@ -1800,7 +1801,7 @@ void RGWPutACLs::execute()
ret = policy->rebuild(store, &owner, new_policy);
if (ret < 0)
- goto done;
+ return;
if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15)) {
ldout(s->cct, 15) << "New AccessControlPolicy:";
@@ -1812,10 +1813,6 @@ void RGWPutACLs::execute()
obj.init(s->bucket, s->object_str);
store->set_atomic(s->obj_ctx, obj);
ret = store->set_attr(s->obj_ctx, obj, RGW_ATTR_ACL, bl);
-
-done:
- free(orig_data);
- free(new_data);
}
int RGWInitMultipart::verify_permission()
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index 52676b92d71..043bad03f3f 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -493,6 +493,9 @@ public:
len = 0;
data = NULL;
}
+ virtual ~RGWPutACLs() {
+ free(data);
+ }
int verify_permission();
void execute();
@@ -540,6 +543,9 @@ public:
data = NULL;
len = 0;
}
+ virtual ~RGWCompleteMultipart() {
+ free(data);
+ }
int verify_permission();
void execute();
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 6255010785a..aa25d507dfb 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -166,8 +166,11 @@ void RGWRadosCtx::set_prefetch_data(rgw_obj& obj) {
void RGWRados::finalize()
{
- if (use_gc_thread)
+ if (use_gc_thread) {
gc->stop_processor();
+ delete gc;
+ gc = NULL;
+ }
}
/**
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index 4e0b52d97b7..d8963e2a91e 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -382,7 +382,12 @@ public:
RGWRadosParams params;
- virtual ~RGWRados() {}
+ virtual ~RGWRados() {
+ if (rados) {
+ rados->shutdown();
+ delete rados;
+ }
+ }
void tick();
diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc
index 4201bd42d06..b6d9f284771 100644
--- a/src/rgw/rgw_tools.cc
+++ b/src/rgw/rgw_tools.cc
@@ -161,3 +161,8 @@ int rgw_tools_init(CephContext *cct)
return 0;
}
+
+void rgw_tools_cleanup()
+{
+ ext_mime_map.clear();
+}
diff --git a/src/rgw/rgw_tools.h b/src/rgw/rgw_tools.h
index 7860ea3819e..6553d9d1465 100644
--- a/src/rgw/rgw_tools.h
+++ b/src/rgw/rgw_tools.h
@@ -12,6 +12,7 @@ int rgw_put_system_obj(RGWRados *rgwstore, rgw_bucket& bucket, string& oid, cons
int rgw_get_obj(RGWRados *rgwstore, void *ctx, rgw_bucket& bucket, string& key, bufferlist& bl, map<string, bufferlist> *pattrs = NULL);
int rgw_tools_init(CephContext *cct);
+void rgw_tools_cleanup();
const char *rgw_find_mime_by_ext(string& ext);
#endif
diff --git a/src/rgw/rgw_xml.cc b/src/rgw/rgw_xml.cc
index c94b82e30fa..4347b06115c 100644
--- a/src/rgw/rgw_xml.cc
+++ b/src/rgw/rgw_xml.cc
@@ -131,13 +131,16 @@ static void xml_start(void *data, const char *el, const char **attr) {
}
RGWXMLParser::
-RGWXMLParser() : p(NULL), buf(NULL), buf_len(0), cur_obj(NULL), success(true)
+RGWXMLParser() : buf(NULL), buf_len(0), cur_obj(NULL), success(true)
{
+ p = XML_ParserCreate(NULL);
}
RGWXMLParser::
~RGWXMLParser()
{
+ XML_ParserFree(p);
+
free(buf);
vector<XMLObj *>::iterator iter;
for (iter = objs.begin(); iter != objs.end(); ++iter) {
@@ -194,7 +197,6 @@ void RGWXMLParser::handle_data(const char *s, int len)
bool RGWXMLParser::init()
{
- p = XML_ParserCreate(NULL);
if (!p) {
return false;
}
@@ -221,8 +223,5 @@ bool RGWXMLParser::parse(const char *_buf, int len, int done)
success = false;
}
- if (done || !success)
- XML_ParserFree(p);
-
return success;
}
diff --git a/src/sample.ceph.conf b/src/sample.ceph.conf
index 88f7f02d992..dd121f2ee5c 100644
--- a/src/sample.ceph.conf
+++ b/src/sample.ceph.conf
@@ -131,27 +131,33 @@
;debug filestore = 20
;debug journal = 20
+ ; The filesystem used on the volumes
+ osd mkfs type = btrfs
+ ; If you want to specify some other mount options, you can do so.
+ ; for other filesystems use 'osd mount options $fstype'
+ osd mount options btrfs = rw,noatime
+ ; The options used to format the filesystem via mkfs.$fstype
+ ; for other filesystems use 'osd mkfs options $fstype'
+ ; osd mkfs options btrfs =
+
+
[osd.0]
host = delta
- ; if 'btrfs devs' is not specified, you're responsible for
+ ; if 'devs' is not specified, you're responsible for
; setting up the 'osd data' dir. if it is not btrfs, things
; will behave up until you try to recover from a crash (which
; usually fine for basic testing).
- btrfs devs = /dev/sdx
-
- ; If you want to specify some other mount options, you can do so.
- ; The default values are rw,noatime
- ;btrfs options = rw,noatime
+ devs = /dev/sdx
[osd.1]
host = epsilon
- btrfs devs = /dev/sdy
+ devs = /dev/sdy
[osd.2]
host = zeta
- btrfs devs = /dev/sdx
+ devs = /dev/sdx
[osd.3]
host = eta
- btrfs devs = /dev/sdy
+ devs = /dev/sdy
diff --git a/src/test/cli/osdmaptool/ceph.conf.withracks b/src/test/cli/osdmaptool/ceph.conf.withracks
index 1e14411c9da..09399e9554a 100644
--- a/src/test/cli/osdmaptool/ceph.conf.withracks
+++ b/src/test/cli/osdmaptool/ceph.conf.withracks
@@ -42,7 +42,9 @@
keyring = /mnt/osd.$id/keyring
osd data = /mnt/osd.$id
osd journal = /dev/disk/by-label/osd.$id.journal
- btrfs devs = /dev/disk/by-label/osd.$id.data
+ osd mkfs type = btrfs
+ osd mount options btrfs = rw,noatime
+ devs = /dev/disk/by-label/osd.$id.data
; temp sage
debug osd = 20
debug ms = 1
diff --git a/src/test/librbd/fsx.c b/src/test/librbd/fsx.c
index f895b5ffa10..d884173b0cf 100644
--- a/src/test/librbd/fsx.c
+++ b/src/test/librbd/fsx.c
@@ -93,7 +93,8 @@ int logcount = 0; /* total ops */
#define OP_PUNCH_HOLE 6
/* rbd-specific operations */
#define OP_CLONE 7
-#define OP_MAX_FULL 8
+#define OP_FLATTEN 8
+#define OP_MAX_FULL 9
/* operation modifiers */
#define OP_CLOSEOPEN 100
@@ -334,6 +335,9 @@ logdump(void)
case OP_CLONE:
prt("CLONE");
break;
+ case OP_FLATTEN:
+ prt("FLATTEN");
+ break;
case OP_SKIPPED:
prt("SKIPPED (no operation)");
break;
@@ -408,7 +412,7 @@ report_failure(int status)
*(((unsigned char *)(cp)) + 1)))
void
-check_buffers(unsigned offset, unsigned size)
+check_buffers(char *good_buf, char *temp_buf, unsigned offset, unsigned size)
{
unsigned char c, t;
unsigned i = 0;
@@ -586,7 +590,7 @@ doread(unsigned offset, unsigned size)
ret, size);
report_failure(141);
}
- check_buffers(offset, size);
+ check_buffers(good_buf, temp_buf, offset, size);
}
@@ -789,6 +793,8 @@ void clone_imagename(char *buf, size_t len, int clones)
strncpy(buf, iname, len);
}
+void check_clone(int clonenum);
+
void
do_clone()
{
@@ -819,6 +825,10 @@ do_clone()
exit(163);
}
+ clone_imagename(imagename, sizeof(imagename), num_clones);
+ clone_imagename(lastimagename, sizeof(lastimagename),
+ num_clones - 1);
+
if ((ret = rbd_snap_create(image, "snap")) < 0) {
simple_err("do_clone: rbd create snap", ret);
exit(164);
@@ -829,10 +839,6 @@ do_clone()
exit(164);
}
- clone_imagename(imagename, sizeof(imagename), num_clones);
- clone_imagename(lastimagename, sizeof(lastimagename),
- num_clones - 1);
-
ret = rbd_clone2(ioctx, lastimagename, "snap", ioctx, imagename,
RBD_FEATURES_ALL, &order, stripe_unit, stripe_count);
if (ret < 0) {
@@ -844,58 +850,58 @@ do_clone()
simple_err("do_clone: rbd open", ret);
exit(166);
}
+
+ if (num_clones > 1)
+ check_clone(num_clones - 2);
}
void
-check_clones()
+check_clone(int clonenum)
{
- char filename[1024];
- char imagename[1024];
+ char filename[128];
+ char imagename[128];
int ret, fd;
rbd_image_t cur_image;
struct stat file_info;
- while (num_clones > 0) {
- prt("checking clone #%d\n", num_clones);
- --num_clones;
-
- clone_imagename(imagename, sizeof(imagename), num_clones);
- if ((ret = rbd_open(ioctx, imagename, &cur_image, NULL)) < 0) {
- simple_err("check_clones: rbd open", ret);
- exit(167);
- }
+ char *good_buf, *temp_buf;
- clone_filename(filename, sizeof(filename), num_clones + 1);
- if ((fd = open(filename, O_RDONLY)) < 0) {
- simple_err("check_clones: open", -errno);
- exit(168);
- }
+ clone_imagename(imagename, sizeof(imagename), clonenum);
+ if ((ret = rbd_open(ioctx, imagename, &cur_image, NULL)) < 0) {
+ simple_err("check_clone: rbd open", ret);
+ exit(167);
+ }
- prt("checking image %s against file %s\n", imagename, filename);
- if ((ret = fstat(fd, &file_info)) < 0) {
- simple_err("check_clones: fstat", -errno);
- exit(169);
- }
+ clone_filename(filename, sizeof(filename), clonenum + 1);
+ if ((fd = open(filename, O_RDONLY)) < 0) {
+ simple_err("check_clone: open", -errno);
+ exit(168);
+ }
- if ((ret = pread(fd, good_buf, file_info.st_size, 0)) < 0) {
- simple_err("check_clones: pread", -errno);
- exit(170);
- }
+ prt("checking clone #%d, image %s against file %s\n",
+ clonenum, imagename, filename);
+ if ((ret = fstat(fd, &file_info)) < 0) {
+ simple_err("check_clone: fstat", -errno);
+ exit(169);
+ }
- if ((ret = rbd_read(cur_image, 0, file_info.st_size, temp_buf)) < 0) {
- simple_err("check_clones: rbd_read", ret);
- exit(171);
- }
- close(fd);
- check_buffers(0, file_info.st_size);
-
- unlink(filename);
- /* remove the snapshot if it exists, ignore
- the error from the last clone. */
- rbd_snap_unprotect(cur_image, "snap");
- rbd_snap_remove(cur_image, "snap");
- rbd_close(cur_image);
- rbd_remove(ioctx, imagename);
+ good_buf = malloc(file_info.st_size);
+ temp_buf = malloc(file_info.st_size);
+
+ if ((ret = pread(fd, good_buf, file_info.st_size, 0)) < 0) {
+ simple_err("check_clone: pread", -errno);
+ exit(170);
+ }
+ if ((ret = rbd_read(cur_image, 0, file_info.st_size, temp_buf)) < 0) {
+ simple_err("check_clone: rbd_read", ret);
+ exit(171);
}
+ close(fd);
+ check_buffers(good_buf, temp_buf, 0, file_info.st_size);
+
+ unlink(filename);
+
+ free(good_buf);
+ free(temp_buf);
}
void
@@ -919,6 +925,25 @@ writefileimage()
}
}
+void
+do_flatten()
+{
+ int ret;
+
+ if (num_clones == 0 ||
+ (rbd_get_parent_info(image, NULL, 0, NULL, 0, NULL, 0)
+ == -ENOENT)) {
+ log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
+ return;
+ }
+ log4(OP_FLATTEN, 0, 0, 0);
+ prt("%lu flatten\n", testcalls);
+
+ if ((ret = rbd_flatten(image)) < 0) {
+ simple_err("do_flatten: rbd flatten", ret);
+ exit(177);
+ }
+}
void
docloseopen(void)
@@ -1048,6 +1073,10 @@ test(void)
do_clone();
break;
+ case OP_FLATTEN:
+ do_flatten();
+ break;
+
default:
prterr("test: unknown operation");
report_failure(42);
@@ -1279,7 +1308,6 @@ main(int argc, char **argv)
char *endp;
char goodfile[1024];
char logfile[1024];
- char finaliname[1024];
goodfile[0] = 0;
logfile[0] = 0;
@@ -1514,7 +1542,7 @@ main(int argc, char **argv)
maxfilelen);
exit(98);
}
- } else
+ } else
check_trunc_hack();
//test_fallocate();
@@ -1527,14 +1555,48 @@ main(int argc, char **argv)
report_failure(99);
}
- clone_imagename(finaliname, sizeof(finaliname), num_clones);
- if ((ret = rbd_remove(ioctx, finaliname)) < 0) {
- prterrcode("rbd_remove final image", ret);
- report_failure(100);
- }
+ if (num_clones > 0)
+ check_clone(num_clones - 1);
+
+ while (num_clones >= 0) {
+ static int first = 1;
+ char clonename[128];
+ char errmsg[128];
+ clone_imagename(clonename, 128, num_clones);
+ if ((ret = rbd_open(ioctx, clonename, &image, NULL)) < 0) {
+ sprintf(errmsg, "rbd_open %s", clonename);
+ prterrcode(errmsg, ret);
+ report_failure(101);
+ }
+ if (!first) {
+ if ((ret = rbd_snap_unprotect(image, "snap")) < 0) {
+ sprintf(errmsg, "rbd_snap_unprotect %s@snap",
+ clonename);
+ prterrcode(errmsg, ret);
+ report_failure(102);
+ }
+ if ((ret = rbd_snap_remove(image, "snap")) < 0) {
+ sprintf(errmsg, "rbd_snap_remove %s@snap",
+ clonename);
+ prterrcode(errmsg, ret);
+ report_failure(103);
+ }
+ }
+ if ((ret = rbd_close(image)) < 0) {
+ sprintf(errmsg, "rbd_close %s", clonename);
+ prterrcode(errmsg, ret);
+ report_failure(104);
+ }
- if (clone_calls)
- check_clones();
+ if ((ret = rbd_remove(ioctx, clonename)) < 0) {
+ sprintf(errmsg, "rbd_remove %s", clonename);
+ prterrcode(errmsg, ret);
+ report_failure(105);
+ }
+
+ first = 0;
+ num_clones--;
+ }
rados_ioctx_destroy(ioctx);
rados_shutdown(cluster);