summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rwxr-xr-xadmin/build-doc1
-rw-r--r--doc/appendix/differences-from-posix.rst17
-rw-r--r--doc/appendix/index.rst10
-rw-r--r--doc/architecture.rst341
-rw-r--r--doc/conf.py1
-rw-r--r--doc/dev/config.rst77
-rw-r--r--doc/dev/context.rst20
-rw-r--r--doc/dev/index.rst22
-rw-r--r--doc/dev/libs.rst18
-rw-r--r--doc/dev/logs.rst31
-rw-r--r--doc/dev/object-store.rst67
-rw-r--r--doc/index.rst2
-rw-r--r--doc/ops/autobuilt.rst64
-rw-r--r--doc/ops/filesystem.rst19
-rw-r--r--doc/ops/index.rst2
-rw-r--r--doc/ops/install.rst194
-rw-r--r--doc/ops/misc.rst14
-rw-r--r--doc/ops/mycluster.conf37
-rw-r--r--keys/autobuild.asc41
-rw-r--r--keys/release.asc140
-rw-r--r--man/mkcephfs.82
-rw-r--r--qa/workunits/false.sh3
-rw-r--r--src/Makefile.am15
-rw-r--r--src/client/Client.cc71
-rw-r--r--src/client/Client.h2
-rw-r--r--src/client/fuse_ll.cc4
-rw-r--r--src/common/config.cc5
-rw-r--r--src/common/config.h5
-rw-r--r--src/cosd.cc22
-rw-r--r--src/doc/object_store.dot61
-rw-r--r--src/include/object.h16
-rw-r--r--src/include/rbd/librbd.h5
-rw-r--r--src/include/rbd/librbd.hpp12
-rw-r--r--src/librbd.cc6
-rw-r--r--src/mds/CInode.cc4
-rw-r--r--src/mds/MDCache.cc2
-rw-r--r--src/mds/flock.cc488
-rw-r--r--src/mds/flock.h542
-rw-r--r--src/mds/mdstypes.h1
-rw-r--r--src/mkcephfs.in4
-rw-r--r--src/mon/OSDMonitor.cc15
-rw-r--r--src/os/CollectionIndex.h3
-rw-r--r--src/os/FileStore.cc10
-rw-r--r--src/os/FileStore.h2
-rw-r--r--src/os/FlatIndex.h2
-rw-r--r--src/os/HashIndex.h7
-rw-r--r--src/os/IndexManager.cc13
-rw-r--r--src/os/LFNIndex.cc165
-rw-r--r--src/os/LFNIndex.h39
-rw-r--r--src/osd/OSD.cc15
-rw-r--r--src/osd/PG.cc1
-rw-r--r--src/osd/PG.h4
-rw-r--r--src/osd/ReplicatedPG.cc69
-rw-r--r--src/osd/ReplicatedPG.h3
-rw-r--r--src/osd/osd_types.cc7
-rw-r--r--src/osdc/ObjectCacher.cc4
-rw-r--r--src/rados.cc2
-rw-r--r--src/rgw/rgw_access.h2
-rw-r--r--src/rgw/rgw_admin.cc142
-rw-r--r--src/rgw/rgw_bucket.cc85
-rw-r--r--src/rgw/rgw_bucket.h1
-rw-r--r--src/rgw/rgw_cache.h5
-rw-r--r--src/rgw/rgw_common.cc5
-rw-r--r--src/rgw/rgw_common.h19
-rw-r--r--src/rgw/rgw_formats.h2
-rw-r--r--src/rgw/rgw_log.cc12
-rw-r--r--src/rgw/rgw_main.cc28
-rw-r--r--src/rgw/rgw_op.cc57
-rw-r--r--src/rgw/rgw_os_auth.h30
-rw-r--r--src/rgw/rgw_rados.cc53
-rw-r--r--src/rgw/rgw_rados.h21
-rw-r--r--src/rgw/rgw_rest.cc63
-rw-r--r--src/rgw/rgw_rest.h8
-rw-r--r--src/rgw/rgw_rest_os.h116
-rw-r--r--src/rgw/rgw_rest_s3.cc2
-rw-r--r--src/rgw/rgw_rest_swift.cc (renamed from src/rgw/rgw_rest_os.cc)56
-rw-r--r--src/rgw/rgw_rest_swift.h116
-rw-r--r--src/rgw/rgw_swift.cc (renamed from src/rgw/rgw_os.cc)26
-rw-r--r--src/rgw/rgw_swift.h (renamed from src/rgw/rgw_os.h)6
-rw-r--r--src/rgw/rgw_swift_auth.cc (renamed from src/rgw/rgw_os_auth.cc)40
-rw-r--r--src/rgw/rgw_swift_auth.h30
-rw-r--r--src/rgw/rgw_user.cc119
-rw-r--r--src/rgw/rgw_user.h10
-rw-r--r--src/test/cli/radosgw_admin/help.t4
-rw-r--r--src/test/store_test.cc4
86 files changed, 2543 insertions, 1269 deletions
diff --git a/.gitignore b/.gitignore
index 99560459d1e..73a8d86bd14 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,3 +53,4 @@ core
.project
.cproject
/build-doc
+/doc/object_store.png
diff --git a/admin/build-doc b/admin/build-doc
index aee1a48345c..622d7c84031 100755
--- a/admin/build-doc
+++ b/admin/build-doc
@@ -10,6 +10,7 @@ if [ ! -e build-doc/doxygen/xml ]; then
fi
dia --filter=png-libart --export=doc/overview.png.tmp doc/overview.dia
+
mv -- doc/overview.png.tmp doc/overview.png
cd build-doc
diff --git a/doc/appendix/differences-from-posix.rst b/doc/appendix/differences-from-posix.rst
new file mode 100644
index 00000000000..f327e786dc9
--- /dev/null
+++ b/doc/appendix/differences-from-posix.rst
@@ -0,0 +1,17 @@
+========================
+ Differences from POSIX
+========================
+
+.. todo:: delete http://ceph.newdream.net/wiki/Differences_from_POSIX
+
+Ceph does have a few places where it diverges from strict POSIX semantics for various reasons:
+
+- Sparse files propagate incorrectly to tools like df. They will only
+ use up the required space, but in df will increase the "used" space
+ by the full file size. We do this because actually keeping track of
+ the space a large, sparse file uses is very expensive.
+- In shared simultaneous writer situations, a write that crosses
+ object boundaries is not necessarily atomic. This means that you
+ could have writer A write "aa|aa" and writer B write "bb|bb"
+ simultaneously (where | is the object boundary), and end up with
+ "aa|bb" rather than the proper "aa|aa" or "bb|bb".
diff --git a/doc/appendix/index.rst b/doc/appendix/index.rst
new file mode 100644
index 00000000000..a98bf899a2a
--- /dev/null
+++ b/doc/appendix/index.rst
@@ -0,0 +1,10 @@
+============
+ Appendices
+============
+
+.. toctree::
+ :glob:
+ :numbered:
+ :titlesonly:
+
+ *
diff --git a/doc/architecture.rst b/doc/architecture.rst
index f66cf3cd9b7..05a3fdfa493 100644
--- a/doc/architecture.rst
+++ b/doc/architecture.rst
@@ -2,167 +2,180 @@
Architecture of Ceph
======================
-- Introduction to Ceph Project
-
- - High-level overview of project benefits for users (few paragraphs, mention each subproject)
- - Introduction to sub-projects (few paragraphs to a page each)
-
- - RADOS
- - RGW
- - RBD
- - Ceph
-
- - Example scenarios Ceph projects are/not suitable for
- - (Very) High-Level overview of Ceph
-
- This would include an introduction to basic project terminology,
- the concept of OSDs, MDSes, and Monitors, and things like
- that. What they do, some of why they're awesome, but not how they
- work.
-
-- Discussion of MDS terminology, daemon types (active, standby,
- standby-replay)
-
-.. todo:: write me
-
-=================================
- Library architecture
-=================================
-Ceph is structured into libraries which are built and then combined together to
-make executables and other libraries.
-
-- libcommon: a collection of utilities which are available to nearly every ceph
- library and executable. In general, libcommon should not contain global
- variables, because it is intended to be linked into libraries such as
- libceph.so.
-
-- libglobal: a collection of utilities focused on the needs of Ceph daemon
- programs. In here you will find pidfile management functions, signal
- handlers, and so forth.
-
-.. todo:: document other libraries
-
-=================================
- Configuration Management System
-=================================
-The configuration management system exists to provide every daemon with the
-proper configuration information. The configuration can be viewed as a set of
-key-value pairs.
-
-How can the configuration be set? Well, there are several sources:
- - the ceph configuration file, usually named ceph.conf
- - command line arguments::
- --debug-ms=1
- --debug-pg=10
- etc.
- - arguments injected at runtime by using injectargs
-
-======================================================
- The Configuration File
-======================================================
-Most configuration settings originate in the Ceph configuration file.
-
-How do we find the configuration file? Well, in order, we check:
- - the default locations
- - the environment variable CEPH_CONF
- - the command line argument -c
-
-Each stanza of the configuration file describes the key-value pairs that will be in
-effect for a particular subset of the daemons. The "global" stanza applies to
-everything. The "mon", "osd", and "mds" stanzas specify settings to take effect
-for all monitors, all osds, and all mds servers, respectively. A stanza of the
-form mon.$name, osd.$name, or mds.$name gives settings for the monitor, OSD, or
-MDS of that name, respectively. Configuration values that appear later in the
-file win over earlier ones.
-
-A sample configuration file can be found in src/sample.ceph.conf.
-
-======================================================
- Metavariables
-======================================================
-The configuration system supports certain "metavariables." If these occur
-inside a configuration value, they are expanded into something else-- similar to
-how bash shell expansion works.
-
-There are a few different metavariables:
- - $host: expands to the current hostname
- - $type: expands to one of "mds", "osd", or "mon"
- - $id: expands to the daemon identifier. For osd.0, this would be "0"; for mds.a, it would be "a"
- - $num: same as $id
- - $name: expands to $type.$id
-
-======================================================
- Interfacing with the Configuration Management System
-======================================================
-There are two ways for Ceph code to get configuration values. One way is to
-read it directly from a variable named "g_conf," or equivalently,
-"g_ceph_ctx->_conf." The other is to register an observer that will called
-every time the relevant configuration values changes. This observer will be
-called soon after the initial configuration is read, and every time after that
-when one of the relevant values changes. Each observer tracks a set of keys
-and is invoked only when one of the relevant keys changes.
-
-The interface to implement is found in common/config_obs.h.
-
-The observer method should be preferred in new code because
- - It is more flexible, allowing the code to do whatever reinitialization needs
- to be done to implement the new configuration value.
- - It is the only way to create a std::string configuration variable that can
- be changed by injectargs.
- - Even for int-valued configuration options, changing the values in one thread
- while another thread is reading them can lead to subtle and
- impossible-to-diagnose bugs.
-
-For these reasons, reading directly from g_conf should be considered deprecated
-and not done in new code. Do not ever alter g_conf.
-
-=================================
- Debug Logs
-=================================
-The main debugging tool for Ceph is the dout and derr logging functions.
-Collectively, these are referred to as "dout logging."
-
-Dout has several log faculties, which can be set at various log
-levels using the configuration management system. So it is possible to enable
-debugging just for the messenger, by setting debug_ms to 10, for example.
-
-Dout is implemented mainly in common/DoutStreambuf.cc
-
-The dout macro avoids even generating log messages which are not going to be
-used, by enclosing them in an "if" statement. What this means is that if you
-have the debug level set at 0, and you run this code
-
-``dout(20) << "myfoo() = " << myfoo() << dendl;``
-
-
-myfoo() will not be called here.
-
-Unfortunately, the performance of debug logging is relatively low. This is
-because there is a single, process-wide mutex which every debug output
-statement takes, and every debug output statement leads to a write() system
-call or a call to syslog(). There is also a computational overhead to using C++
-streams to consider. So you will need to be parsimonius in your logging to get
-the best performance.
-
-Sometimes, enabling logging can hide race conditions and other bugs by changing
-the timing of events. Keep this in mind when debugging.
-
-=================================
- CephContext
-=================================
-A CephContext represents a single view of the Ceph cluster. It comes complete
-with a configuration, a set of performance counters (PerfCounters), and a
-heartbeat map. You can find more information about CephContext in
-src/common/ceph_context.h.
-
-Generally, you will have only one CephContext in your application, called
-g_ceph_context. However, in library code, it is possible that the library user
-will initialize multiple CephContexts. For example, this would happen if he
-called rados_create more than once.
-
-A ceph context is required to issue log messages. Why is this? Well, without
-the CephContext, we would not know which log messages were disabled and which
-were enabled. The dout() macro implicitly references g_ceph_context, so it
-can't be used in library code. It is fine to use dout and derr in daemons, but
-in library code, you must use ldout and lderr, and pass in your own CephContext
-object. The compiler will enforce this restriction.
+Ceph is a distributed network storage and file system with distributed
+metadata management and POSIX semantics.
+
+RADOS is a reliable object store, used by Ceph, but also directly
+accessible.
+
+``radosgw`` is an S3-compatible RESTful HTTP service for object
+storage, using RADOS storage.
+
+RBD is a Linux kernel feature that exposes RADOS storage as a block
+device. Qemu/KVM also has a direct RBD client, that avoids the kernel
+overhead.
+
+
+.. _monitor:
+
+Monitor cluster
+===============
+
+``cmon`` is a lightweight daemon that provides a consensus for
+distributed decisionmaking in a Ceph/RADOS cluster.
+
+It also is the initial point of contact for new clients, and will hand
+out information about the topology of the cluster, such as the
+``osdmap``.
+
+You normally run 3 ``cmon`` daemons, on 3 separate physical machines,
+isolated from each other; for example, in different racks or rows.
+
+You could run just 1 instance, but that means giving up on high
+availability.
+
+You may use the same hosts for ``cmon`` and other purposes.
+
+``cmon`` processes talk to each other using a Paxos_\-style
+protocol. They discover each other via the ``[mon.X] mon addr`` fields
+in ``ceph.conf``.
+
+.. todo:: What about ``monmap``? Fact check.
+
+Any decision requires the majority of the ``cmon`` processes to be
+healthy and communicating with each other. For this reason, you never
+want an even number of ``cmon``\s; there is no unambiguous majority
+subgroup for an even number.
+
+.. _Paxos: http://en.wikipedia.org/wiki/Paxos_algorithm
+
+.. todo:: explain monmap
+
+
+.. _rados:
+
+
+RADOS
+=====
+
+``cosd`` is the storage daemon that provides the RADOS service. It
+uses ``cmon`` for cluster membership, services object read/write/etc
+request from clients, and peers with other ``cosd``\s for data
+replication.
+
+The data model is fairly simple on this level. There are multiple
+named pools, and within each pool there are named objects, in a flat
+namespace (no directories). Each object has both data and metadata.
+
+The data for an object is a single, potentially big, series of
+bytes. Additionally, the series may be sparse, it may have holes that
+contain binary zeros, and take up no actual storage.
+
+The metadata is an unordered set of key-value pairs. It's semantics
+are completely up to the client; for example, the Ceph filesystem uses
+metadata to store file owner etc.
+
+.. todo:: Verify that metadata is unordered.
+
+Underneath, ``cosd`` stores the data on a local filesystem. We
+recommend using Btrfs_, but any POSIX filesystem that has extended
+attributes should work (see :ref:`xattr`).
+
+.. _Btrfs: http://en.wikipedia.org/wiki/Btrfs
+
+.. todo:: write about access control
+
+.. todo:: explain osdmap
+
+.. todo:: explain plugins ("classes")
+
+
+.. _cephfs:
+
+Ceph filesystem
+===============
+
+The Ceph filesystem service is provided by a daemon called
+``cmds``. It uses RADOS to store all the filesystem metadata
+(directories, file ownership, access modes, etc), and directs clients
+to access RADOS directly for the file contents.
+
+The Ceph filesystem aims for POSIX compatibility, except for a few
+chosen differences. See :doc:`/appendix/differences-from-posix`.
+
+``cmds`` can run as a single process, or it can be distributed out to
+multiple physical machines, either for high availability or for
+scalability.
+
+For high availability, the extra ``cmds`` instances can be `standby`,
+ready to take over the duties of any failed ``cmds`` that was
+`active`. This is easy because all the data, including the journal, is
+stored on RADOS. The transition is triggered automatically by
+``cmon``.
+
+For scalability, multiple ``cmds`` instances can be `active`, and they
+will split the directory tree into subtrees (and shards of a single
+busy directory), effectively balancing the load amongst all `active`
+servers.
+
+Combinations of `standby` and `active` etc are possible, for example
+running 3 `active` ``cmds`` instances for scaling, and one `standby`.
+
+To control the number of `active` ``cmds``\es, see :doc:`/ops/grow/mds`.
+
+.. topic:: Status as of 2011-09:
+
+ Multiple `active` ``cmds`` operation is stable under normal
+ circumstances, but some failure scenarios may still cause
+ operational issues.
+
+.. todo:: document `standby-replay`
+
+.. todo:: mds.0 vs mds.alpha etc details
+
+
+
+``radosgw``
+===========
+
+``radosgw`` is a FastCGI service that provides a RESTful_ HTTP API to
+store objects and metadata. It layers on top of RADOS with its own
+data formats, and maintains it's own user database, authentication,
+access control, and so on.
+
+.. _RESTful: http://en.wikipedia.org/wiki/RESTful
+
+
+Rados Block Device (RBD)
+========================
+
+In virtual machine scenarios, RBD is typically used via the ``rbd``
+network storage driver in Qemu/KVM, where the host machine uses
+``librbd`` to provide a block device service to the guest.
+
+Alternatively, as no direct ``librbd`` support is available in Xen,
+the Linux kernel can act as the RBD client and provide a real block
+device on the host machine, that can then be accessed by the
+virtualization. This is done with the command-line tool ``rbd`` (see
+:doc:`/ops/rbd`).
+
+The latter is also useful in non-virtualized scenarios.
+
+Internally, RBD stripes the device image over multiple RADOS objects,
+each typically located on a separate ``cosd``, allowing it to perform
+better than a single server could.
+
+
+Client
+======
+
+.. todo:: cephfs, cfuse, librados, libceph, librbd
+
+
+.. todo:: Summarize how much Ceph trusts the client, for what parts (security vs reliability).
+
+
+TODO
+====
+
+.. todo:: Example scenarios Ceph projects are/not suitable for
diff --git a/doc/conf.py b/doc/conf.py
index b74229608a1..afac33f816a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -22,6 +22,7 @@ html_sidebars = {
# ugly kludge until breathe is distutils-friendly
import sys; sys.path.append('../build-doc/breathe')
extensions = [
+ 'sphinx.ext.graphviz',
'sphinx.ext.todo',
'breathe',
]
diff --git a/doc/dev/config.rst b/doc/dev/config.rst
new file mode 100644
index 00000000000..59f1b92989c
--- /dev/null
+++ b/doc/dev/config.rst
@@ -0,0 +1,77 @@
+=================================
+ Configuration Management System
+=================================
+
+The configuration management system exists to provide every daemon with the
+proper configuration information. The configuration can be viewed as a set of
+key-value pairs.
+
+How can the configuration be set? Well, there are several sources:
+ - the ceph configuration file, usually named ceph.conf
+ - command line arguments::
+ --debug-ms=1
+ --debug-pg=10
+ etc.
+ - arguments injected at runtime by using injectargs
+
+
+The Configuration File
+======================
+
+Most configuration settings originate in the Ceph configuration file.
+
+How do we find the configuration file? Well, in order, we check:
+ - the default locations
+ - the environment variable CEPH_CONF
+ - the command line argument -c
+
+Each stanza of the configuration file describes the key-value pairs that will be in
+effect for a particular subset of the daemons. The "global" stanza applies to
+everything. The "mon", "osd", and "mds" stanzas specify settings to take effect
+for all monitors, all osds, and all mds servers, respectively. A stanza of the
+form mon.$name, osd.$name, or mds.$name gives settings for the monitor, OSD, or
+MDS of that name, respectively. Configuration values that appear later in the
+file win over earlier ones.
+
+A sample configuration file can be found in src/sample.ceph.conf.
+
+
+Metavariables
+=============
+
+The configuration system supports certain "metavariables." If these occur
+inside a configuration value, they are expanded into something else-- similar to
+how bash shell expansion works.
+
+There are a few different metavariables:
+ - $host: expands to the current hostname
+ - $type: expands to one of "mds", "osd", or "mon"
+ - $id: expands to the daemon identifier. For osd.0, this would be "0"; for mds.a, it would be "a"
+ - $num: same as $id
+ - $name: expands to $type.$id
+
+
+Interfacing with the Configuration Management System
+====================================================
+
+There are two ways for Ceph code to get configuration values. One way is to
+read it directly from a variable named "g_conf," or equivalently,
+"g_ceph_ctx->_conf." The other is to register an observer that will called
+every time the relevant configuration values changes. This observer will be
+called soon after the initial configuration is read, and every time after that
+when one of the relevant values changes. Each observer tracks a set of keys
+and is invoked only when one of the relevant keys changes.
+
+The interface to implement is found in common/config_obs.h.
+
+The observer method should be preferred in new code because
+ - It is more flexible, allowing the code to do whatever reinitialization needs
+ to be done to implement the new configuration value.
+ - It is the only way to create a std::string configuration variable that can
+ be changed by injectargs.
+ - Even for int-valued configuration options, changing the values in one thread
+ while another thread is reading them can lead to subtle and
+ impossible-to-diagnose bugs.
+
+For these reasons, reading directly from g_conf should be considered deprecated
+and not done in new code. Do not ever alter g_conf.
diff --git a/doc/dev/context.rst b/doc/dev/context.rst
new file mode 100644
index 00000000000..1a2b2cbfb00
--- /dev/null
+++ b/doc/dev/context.rst
@@ -0,0 +1,20 @@
+=============
+ CephContext
+=============
+
+A CephContext represents a single view of the Ceph cluster. It comes complete
+with a configuration, a set of performance counters (PerfCounters), and a
+heartbeat map. You can find more information about CephContext in
+src/common/ceph_context.h.
+
+Generally, you will have only one CephContext in your application, called
+g_ceph_context. However, in library code, it is possible that the library user
+will initialize multiple CephContexts. For example, this would happen if he
+called rados_create more than once.
+
+A ceph context is required to issue log messages. Why is this? Well, without
+the CephContext, we would not know which log messages were disabled and which
+were enabled. The dout() macro implicitly references g_ceph_context, so it
+can't be used in library code. It is fine to use dout and derr in daemons, but
+in library code, you must use ldout and lderr, and pass in your own CephContext
+object. The compiler will enforce this restriction.
diff --git a/doc/dev/index.rst b/doc/dev/index.rst
new file mode 100644
index 00000000000..69fda91850c
--- /dev/null
+++ b/doc/dev/index.rst
@@ -0,0 +1,22 @@
+==================================
+ Internal developer documentation
+==================================
+
+.. note:: If you're looking for how to use Ceph as a library from your
+ own software, please see :doc:`/api/index`.
+
+You can start a development mode Ceph cluster, after compiling the source, with::
+
+ cd src
+ install -d -m0755 out dev/osd0
+ ./vstart.sh -n -x -l
+ # check that it's there
+ ./ceph health
+
+.. todo:: vstart is woefully undocumented and full of sharp sticks to poke yourself with.
+
+
+.. toctree::
+ :glob:
+
+ *
diff --git a/doc/dev/libs.rst b/doc/dev/libs.rst
new file mode 100644
index 00000000000..bdebdb3b000
--- /dev/null
+++ b/doc/dev/libs.rst
@@ -0,0 +1,18 @@
+======================
+ Library architecture
+======================
+
+Ceph is structured into libraries which are built and then combined together to
+make executables and other libraries.
+
+- libcommon: a collection of utilities which are available to nearly every ceph
+ library and executable. In general, libcommon should not contain global
+ variables, because it is intended to be linked into libraries such as
+ libceph.so.
+
+- libglobal: a collection of utilities focused on the needs of Ceph daemon
+ programs. In here you will find pidfile management functions, signal
+ handlers, and so forth.
+
+.. todo:: document other libraries
+
diff --git a/doc/dev/logs.rst b/doc/dev/logs.rst
new file mode 100644
index 00000000000..f4bef84c2ca
--- /dev/null
+++ b/doc/dev/logs.rst
@@ -0,0 +1,31 @@
+============
+ Debug Logs
+============
+
+The main debugging tool for Ceph is the dout and derr logging functions.
+Collectively, these are referred to as "dout logging."
+
+Dout has several log faculties, which can be set at various log
+levels using the configuration management system. So it is possible to enable
+debugging just for the messenger, by setting debug_ms to 10, for example.
+
+Dout is implemented mainly in common/DoutStreambuf.cc
+
+The dout macro avoids even generating log messages which are not going to be
+used, by enclosing them in an "if" statement. What this means is that if you
+have the debug level set at 0, and you run this code
+
+``dout(20) << "myfoo() = " << myfoo() << dendl;``
+
+
+myfoo() will not be called here.
+
+Unfortunately, the performance of debug logging is relatively low. This is
+because there is a single, process-wide mutex which every debug output
+statement takes, and every debug output statement leads to a write() system
+call or a call to syslog(). There is also a computational overhead to using C++
+streams to consider. So you will need to be parsimonius in your logging to get
+the best performance.
+
+Sometimes, enabling logging can hide race conditions and other bugs by changing
+the timing of events. Keep this in mind when debugging.
diff --git a/doc/dev/object-store.rst b/doc/dev/object-store.rst
new file mode 100644
index 00000000000..698070c5e7f
--- /dev/null
+++ b/doc/dev/object-store.rst
@@ -0,0 +1,67 @@
+====================================
+ Object Store Architecture Overview
+====================================
+
+.. graphviz::
+
+ /*
+ * Rough outline of object store module dependencies
+ */
+
+ digraph object_store {
+ size="7,7";
+ node [color=lightblue2, style=filled, fontname="Serif"];
+
+ "testrados" -> "librados"
+ "testradospp" -> "librados"
+
+ "rbd" -> "librados"
+
+ "radostool" -> "librados"
+
+ "radosgw_admin" -> "rgw"
+
+ "rgw" -> "librados"
+
+ "radosacl" -> "librados"
+
+ "librados" -> "objecter"
+
+ "ObjectCacher" -> "Filer"
+
+ "dumpjournal" -> "Journaler"
+
+ "Journaler" -> "Filer"
+
+ "SyntheticClient" -> "Filer"
+ "SyntheticClient" -> "objecter"
+
+ "Filer" -> "objecter"
+
+ "objecter" -> "OSDMap"
+
+ "cosd" -> "PG"
+ "cosd" -> "ObjectStore"
+
+ "crushtool" -> "CrushWrapper"
+
+ "OSDMap" -> "CrushWrapper"
+
+ "OSDMapTool" -> "OSDMap"
+
+ "PG" -> "ReplicatedPG"
+ "PG" -> "ObjectStore"
+ "PG" -> "OSDMap"
+
+ "ReplicatedPG" -> "ObjectStore"
+ "ReplicatedPG" -> "OSDMap"
+
+ "ObjectStore" -> "FileStore"
+
+ "FileStore" -> "ext3"
+ "FileStore" -> "ext4"
+ "FileStore" -> "btrfs"
+ }
+
+
+.. todo:: write more here
diff --git a/doc/index.rst b/doc/index.rst
index c108dffeeb4..d3ad0c669ec 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -90,9 +90,11 @@ Table of Contents
architecture
ops/index
api/index
+ Internals <dev/index>
man/index
papers
glossary
+ appendix/index
Indices and tables
diff --git a/doc/ops/autobuilt.rst b/doc/ops/autobuilt.rst
new file mode 100644
index 00000000000..add9b292a92
--- /dev/null
+++ b/doc/ops/autobuilt.rst
@@ -0,0 +1,64 @@
+=============================
+ Autobuilt unstable packages
+=============================
+
+We automatically build Debian and Ubuntu packages for any branches or
+tags that appear in the |ceph.git|_. We build packages for the `amd64`
+and `i386` architectures (`arch list`_), for the following
+distributions (`distro list`_):
+
+- ``natty`` (Ubuntu 11.04)
+- ``squeeze`` (Debian 6.0)
+
+.. |ceph.git| replace::
+ ``ceph.git`` repository
+.. _`ceph.git`: https://github.com/NewDreamNetwork/ceph
+
+.. _`arch list`: http://ceph.newdream.net/debian-snapshot-amd64/master/dists/natty/main/
+.. _`distro list`: http://ceph.newdream.net/debian-snapshot-amd64/master/dists/
+
+The current status of autobuilt packages can be found at
+http://ceph.newdream.net/gitbuilder-deb-amd64/ .
+
+If you wish to use these packages, you need to modify the
+:ref:`earlier instructions <install-debs>` as follows:
+
+.. warning:: The following commands make your computer trust any code
+ that makes it into ``ceph.git``, including work in progress
+ branches and versions of code with possible security issues (that
+ were fixed afterwards). Use at your own risk!
+
+Whenever we say *DISTRO* below, replace it with the codename of your
+operating system.
+
+Whenever we say *BRANCH* below, replace it with the version of the
+code you want to run, e.g. ``master``, ``stable`` or ``v0.34`` (`branch list`_ [#broken-links]_).
+
+.. _`branch list`: http://ceph.newdream.net/debian-snapshot-amd64/
+
+Run these commands on all nodes::
+
+ wget -q -O- https://raw.github.com/NewDreamNetwork/ceph/master/keys/autobuild.asc \
+ | sudo apt-key add -
+
+ sudo tee /etc/apt/sources.list.d/ceph.list <<EOF
+ deb http://ceph.newdream.net/debian-snapshot-amd64/BRANCH/ DISTRO main
+ deb-src http://ceph.newdream.net/debian-snapshot-amd64/BRANCH/ DISTRO main
+ EOF
+
+ sudo apt-get update
+ sudo apt-get install ceph
+
+From here on, you can follow the usual set up instructions in
+:doc:`/ops/install`.
+
+
+
+.. rubric:: Footnotes
+
+.. [#broken-links] Technical issues with how that part of the URL
+ space is HTTP reverse proxied means that the links in the generated
+ directory listings are broken. Please don't click on the links,
+ instead edit the URL bar manually, for now.
+
+ .. todo:: Fix the gitbuilder reverse proxy to not break relative URLs.
diff --git a/doc/ops/filesystem.rst b/doc/ops/filesystem.rst
index 6a9ddeca7a6..ad302dff69c 100644
--- a/doc/ops/filesystem.rst
+++ b/doc/ops/filesystem.rst
@@ -3,3 +3,22 @@
=======================================
.. todo:: Benefits of each, limits on non-btrfs ones, performance data when we have them, etc
+
+
+.. _btrfs:
+
+Btrfs
+-----
+
+.. todo:: what does btrfs give you (the journaling thing)
+
+
+ext4/ext3
+---------
+
+.. _xattr:
+
+Enabling extended attributes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. todo:: how to enable xattr on ext4/3
diff --git a/doc/ops/index.rst b/doc/ops/index.rst
index 3221394d374..915a02dcfc5 100644
--- a/doc/ops/index.rst
+++ b/doc/ops/index.rst
@@ -14,3 +14,5 @@
monitor
grow/index
data-placement
+ autobuilt
+ misc
diff --git a/doc/ops/install.rst b/doc/ops/install.rst
index 692ac926bec..d0b589d56fc 100644
--- a/doc/ops/install.rst
+++ b/doc/ops/install.rst
@@ -2,13 +2,195 @@
Installing a Ceph cluster
===========================
+For development and really early stage testing, see :doc:`/dev/index`.
+
+For installing the latest development builds, see
+:doc:`/ops/autobuilt`.
+
+Installing any complex distributed software can be a lot of work. We
+support two automated ways of installing Ceph: using Chef_, or with
+the ``mkcephfs`` shell script.
+
+.. _Chef: http://wiki.opscode.com/display/chef
+
+.. topic:: Status as of 2011-09
+
+ This section hides a lot of the tedious underlying details. If you
+ need to, or wish to, roll your own deployment automation, or are
+ doing it manually, you'll have to dig into a lot more intricate
+ details. We are working on simplifying the installation, as that
+ also simplifies our Chef cookbooks.
+
+
+Installing Ceph using Chef
+==========================
+
+(Try saying that fast 10 times.)
+
+.. topic:: Status as of 2011-09
+
+ While we have Chef cookbooks in use internally, they are not yet
+ ready to handle unsupervised installation of a full cluster. Stay
+ tuned for updates.
+
.. todo:: write me
-Authentication is optional but very much recommended.
-Basically, everything somebody needs to go through to build a new
-cluster when not cheating via vstart or teuthology, but without
-mentioning all the design tradeoffs and options like journaling
-locations or filesystems
+Installing Ceph using ``mkcephfs``
+==================================
+
+Pick a host that has the Ceph software installed -- it does not have
+to be a part of your cluster, but it does need to have *matching
+versions* of the ``mkcephfs`` command and other Ceph tools
+installed. This will be your `admin host`.
+
+
+Installing the packages
+-----------------------
+
+
+.. _install-debs:
+
+Debian/Ubuntu
+~~~~~~~~~~~~~
+
+We regularly build Debian and Ubuntu packages for the `amd64` and
+`i386` architectures, for the following distributions:
+
+- ``sid`` (Debian unstable)
+- ``squeeze`` (Debian 6.0)
+- ``lenny`` (Debian 5.0)
+- ``oneiric`` (Ubuntu 11.11)
+- ``natty`` (Ubuntu 11.04)
+- ``maverick`` (Ubuntu 10.10)
+
+.. todo:: http://ceph.newdream.net/debian/dists/ also has ``lucid``
+ (Ubuntu 10.04), should that be removed?
+
+Whenever we say *DISTRO* below, replace that with the codename of your
+operating system.
+
+Run these commands on all nodes::
+
+ wget -q -O- https://raw.github.com/NewDreamNetwork/ceph/master/keys/release.asc \
+ | sudo apt-key add -
+
+ sudo tee /etc/apt/sources.list.d/ceph.list <<EOF
+ deb http://ceph.newdream.net/debian/ DISTRO main
+ deb-src http://ceph.newdream.net/debian/ DISTRO main
+ EOF
+
+ sudo apt-get update
+ sudo apt-get install ceph
+
+
+.. todo:: For older distributions, you may need to make sure your apt-get may read .bz2 compressed files. This works for Debian Lenny 5.0.3: ``apt-get install bzip2``
+
+.. todo:: Ponder packages; ceph.deb currently pulls in gceph (ceph.deb
+ Recommends: ceph-client-tools ceph-fuse libceph1 librados2 librbd1
+ btrfs-tools gceph) (other interesting: ceph-client-tools ceph-fuse
+ libceph-dev librados-dev librbd-dev obsync python-ceph radosgw)
+
+
+.. todo:: Other operating system support.
+
+
+Creating a ``ceph.conf`` file
+-----------------------------
+
+On the `admin host`, create a file with a name like
+``mycluster.conf``.
+
+Here's a template for a 3-node cluster, where all three machines run a
+:ref:`monitor <monitor>` and an :ref:`object store <rados>`, and the
+first one runs the :ref:`Ceph filesystem daemon <cephfs>`. Replace the
+hostnames and IP addresses with your own, and add/remove hosts as
+appropriate. All hostnames *must* be short form (no domain).
+
+.. literalinclude:: mycluster.conf
+ :language: ini
+
+Note how the ``host`` variables dictate what node runs what
+services. See :doc:`/ops/config` for more information.
+
+.. todo:: More specific link for host= convention.
+
+.. todo:: Point to cluster design docs, once they are ready.
+
+.. todo:: At this point, either use 1 or 3 mons, point to :doc:`grow/mon`
+
+
+Running ``mkcephfs``
+--------------------
+
+Verify that you can manage the nodes from the host you intend to run
+``mkcephfs`` on:
+
+- Make sure you can SSH_ from the `admin host` into all the nodes
+ using the short hostnames (``myserver`` not
+ ``myserver.mydept.example.com``), with no user specified
+ [#ssh_config]_.
+- Make sure you can run ``sudo`` without passphrase prompts on all
+ nodes [#sudo]_.
+
+.. _SSH: http://openssh.org/
+
+If you are not using :ref:`Btrfs <btrfs>`, enable :ref:`extended
+attributes <xattr>`.
+
+On each node, make sure the directory ``/srv/osd.N`` (with the
+appropriate ``N``) exists, and the right filesystem is mounted. If you
+are not using a separate filesystem for the file store, just run
+``sudo mkdir /srv/osd.N`` (with the right ``N``).
+
+Then, using the right path to the ``mycluster.conf`` file you prepared
+earlier, run::
+
+ mkcephfs -a -c mycluster.conf -k mycluster.keyring
+
+This will place an `admin key` into ``mycluster.keyring``. This will
+be used to manage the cluster. Treat it like a ``root`` password to
+your filesystem.
+
+.. todo:: Link to explanation of `admin key`.
+
+That should SSH into all the nodes, and set up Ceph for you.
+
+It does **not** copy the configuration, or start the services. Let's
+do that::
+
+ ssh myserver01 sudo tee /etc/ceph/ceph.conf <mycluster.conf
+ ssh myserver02 sudo tee /etc/ceph/ceph.conf <mycluster.conf
+ ssh myserver03 sudo tee /etc/ceph/ceph.conf <mycluster.conf
+ ...
+
+ ssh myserver01 sudo /etc/init.d/ceph start
+ ssh myserver02 sudo /etc/init.d/ceph start
+ ssh myserver03 sudo /etc/init.d/ceph start
+ ...
+
+After a little while, the cluster should come up and reach a healthy
+state. We can check that::
+
+ ceph -k mycluster.keyring -c mycluster.conf health
+ 2011-09-06 12:33:51.561012 mon <- [health]
+ 2011-09-06 12:33:51.562164 mon2 -> 'HEALTH_OK' (0)
+
+.. todo:: Document "healthy"
+
+.. todo:: Improve output.
+
+
+
+.. rubric:: Footnotes
+
+.. [#ssh_config] Something like this in your ``~/.ssh_config`` may
+ help -- unfortunately you need an entry per node::
+
+ Host myserverNN
+ Hostname myserverNN.dept.example.com
+ User ubuntu
+
+.. [#sudo] The relevant ``sudoers`` syntax looks like this::
-At this point, either use 1 or 3 mons, point to :doc:`grow/mon`
+ %admin ALL=(ALL) NOPASSWD:ALL
diff --git a/doc/ops/misc.rst b/doc/ops/misc.rst
new file mode 100644
index 00000000000..084debc80e7
--- /dev/null
+++ b/doc/ops/misc.rst
@@ -0,0 +1,14 @@
+===============
+ Miscellaneous
+===============
+
+.. todo:: This section should not exist. Try to reorganize, when
+ document is otherwise more ready.
+
+
+Disabling encryption
+====================
+
+Authentication is optional but very much recommended.
+
+.. todo:: write me
diff --git a/doc/ops/mycluster.conf b/doc/ops/mycluster.conf
new file mode 100644
index 00000000000..454eca63bfb
--- /dev/null
+++ b/doc/ops/mycluster.conf
@@ -0,0 +1,37 @@
+[global]
+ auth supported = cephx
+ keyring = /etc/ceph/$name.keyring
+
+[mon]
+ mon data = /srv/mon.$id
+
+[mds]
+
+[osd]
+ osd data = /srv/osd.$id
+ osd journal = /srv/osd.$id.journal
+ osd journal size = 1000
+
+[mon.a]
+ host = myserver01
+ mon addr = 10.0.0.101:6789
+
+[mon.b]
+ host = myserver02
+ mon addr = 10.0.0.102:6789
+
+[mon.c]
+ host = myserver03
+ mon addr = 10.0.0.103:6789
+
+[osd.0]
+ host = myserver01
+
+[osd.1]
+ host = myserver02
+
+[osd.2]
+ host = myserver03
+
+[mds.a]
+ host = myserver01
diff --git a/keys/autobuild.asc b/keys/autobuild.asc
new file mode 100644
index 00000000000..2a1d17dc9ef
--- /dev/null
+++ b/keys/autobuild.asc
@@ -0,0 +1,41 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.4.9 (GNU/Linux)
+
+mQGiBE1Rr28RBADCxdpLV3ea9ocpS/1+UCvHqD5xjmlw/9dmji4qrUX0+IhPMNuA
+GBBt2CRaR7ygMF5S0NFXooegph0/+NT0KisLIuhUI3gde4SWb5jsb8hpGUse9MC5
+DN39P46zZSpepIMlQuQUkge8W/H2qBu10RcwQhs7o2fZ1zK9F3MmRCkBqwCggpap
+GsOgE2IlWjcztmE6xcPO0wED/R4BxTaQM+jxIjylnHgn9PYy6795yIc/ZoYjNnIh
+QyjqbLWnyzeTmjPBwcXNljKqzEoA/Cjb2gClxHXrYAw7bGu7wKbnqhzdghSx7ab+
+HwIoy/v6IQqv+EXZgYHonqQwqtgfAHp5ON2gWu03cHoGkXfmA4qZIoowqMolZhGo
+cF30A/9GotDdnMlqh8bFBOCMuxfRow7H8RpfL0fX7VHA0knAZEDk2rNFeebL5QKH
+GNJm9Wa6JSVj1NUIaz4LHyravqXi4MXzlUqauhLHw1iG+qwZlPM04z+1Dj6A+2Hr
+b5UxI/I+EzmO5OYa38YWOqybNVBH0wO+sMCpdBq0LABa8X29LbRPQ2VwaCBhdXRv
+bWF0ZWQgcGFja2FnZSBidWlsZCAoQ2VwaCBhdXRvbWF0ZWQgcGFja2FnZSBidWls
+ZCkgPHNhZ2VAbmV3ZHJlYW0ubmV0PohmBBMRAgAmBQJNUa9vAhsDBQkDwmcABgsJ
+CAcDAgQVAggDBBYCAwECHgECF4AACgkQbq6uIgPDlRpR0QCfZnYE8vEDX4JL3sZj
+5LvMsXruULIAnjHBAYvdlu5iMowoEMQDJlNNdscxuQQNBE1Rr28QEACKG04kxGY1
+cwGoInHVP6z1+8oqGiaiYWFflYRtSiwoUVtl30T1sMOSzoEvmauc+rmBBfsyaBb8
+DLDUIgGKv1FCOY/tfqnOyQXotPjgaLeCtK5A5Z5D212wbskf5fRHAxiychwKURiE
+eesRa7EWrF6ohFxOTy9NOlFi7ctusShw6Q2kUtN7bQCX9hJdYs7PYQXvCXvW8DNt
+7IitF7MpgMHNcj0wik6p38I4s7pqK6mqP4AXVVSWbJKr/LSz8bI8KhWRAT7erVAZ
+f6FElR2xZVr3c4zsE2HFpnZTsM5y/nj8fUkgKGl8OfBuUoh+MCVfnPmE6sgWfDTK
+kwWtUcmL6V9UQ1INUJ3sk+XBY9SMNbOn04su9FjQyNEMI/3VK7yuyKBRAN7IIVgP
+2ch499m6+YFV9ZkG3JSTovNiqSpQouW7YPkS+8mxlPo03LQcU5bHeacBl0T8Xjlv
+qu6q279EliHul4huKL0+myPN4DtmOTh/kwgSy3BGCBdS+wfAJSZcuKI7pk7pHGCd
+UjNMHQZmPFbwzp33bVLd16gnAx0OW5DOn6l0VfgIQNSJ2rn7WZ5jdyg/Flp2VlWV
+tAHFLzkCa+LvQ5twSuzrV/VipSr3xz3pTDLY+ZxDztvrgA6AST8+sdq6uQTYjwUQ
+V0wzanvp9hkC5eqRY6YlzcgMkWFv8DCIEwADBQ//ZQaeVmG6T5vyfXf2JrCipmI4
+MAdO+ezEtWE82wgixlCvvm26UmUejCYgtD6DmwY/7/bIjvJDhUwP0+hAHHOpR62g
+ncoMtbMryHpm3FvYH58JNk5gx8ZA322WEc2GCRCQzrMQoMKBcpZY/703GpQ4l3RZ
+7/25gq7ANohV5zeddFQftc05PMBBJLU3U+lrnahJS1WaOXNQzS6oVj9jNda1jkgc
+Qni6QssSIMT6rAPsVbGJhe9mxr2VWdQ90QlubpszIeSJuqqJxLwqH8XHXZmQOYxm
+yVP9a3pFqWDmsNxDA8ttYnMIc+nUAgCDJ84ScwQ1GvoCUD1b1cFNzvvhEHsNb4D/
+XbdrFcFGwEkeyivUsojdq2YnGjYSgauqyNWbeEgBrWzUe5USYysmziL/KAubcUjI
+beRGxyPS6iQ2kbvfEJJPgocWTfLs5j61FObO+MVlj+PEmxWbcsIRv/pnG2V2FPJ8
+evhzgvp7cG9imZPM6dWHzc/ZFdi3Bcs51RtStsvPqXv4icKIi+01h1MLHNBqwuUk
+IiiK7ooMlvnp+DiEsVSuYYKBdGTi+4+nduuYL2g8CTNJKZuC46dY7EcE3lRYZlxl
+7dwN3jfLPRlnNscs34dwhZa+b70Flia0U1DNF4jrIFFBSHD3TqMg0Z6kxp1Tfxpe
+GOLOqnBWrr0GKehu9CGITwQYEQIADwUCTVGvbwIbDAUJA8JnAAAKCRBurq4iA8OV
+GqKjAJ9QA7mNQs0Rko5VGYA+xjPokf0yVACfQMEFVHxT/k9+awAbBFLR3D0jjJ4=
+=PYuQ
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/keys/release.asc b/keys/release.asc
new file mode 100644
index 00000000000..a4e8441c01d
--- /dev/null
+++ b/keys/release.asc
@@ -0,0 +1,140 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.4.9 (GNU/Linux)
+
+mQGiBEtYu+8RBAC3y30BJKKV67fz6UhB0cQm1mdfYl1gBMPQv5FuX9sai7ZuzAWh
+83uGXByCp6foBp33xbusVqtM9YBrswBUfR0K5SJiWrp07e/vUz6MIORakPhQ4ggK
+M+pAvg+myQa9FdiAdFN1Pm7QXzziLSJ6hkNtmytkSxNQ1p/srx4PT2WHGwCg6bTk
+i7mUeTw+l3MQtudEAXRwb2sD/isdW5KdWtok9UwncQlfsdv8C/coHeGLHBxvIEdh
+BGHUD6DnWEIb4XbCaIBXpHu0aztm9PLE160YN8dYRMqnuwNmU7RWp0b97g641xi1
+YW/+ShBVrfUjHlYjO7vZpIJlO6rnTQB6bLuKrZ0ZG5KdMnQjbURnCzMtFPMdp0IV
+P9EgA/0YAxL1AM4aTeSYLfyDSRHe5Nvv8Y0wPV8cHKQXacgP5042riyjCxY9vSkQ
+yiLqKvC+ZBr93vRnLdS4UnDiOlEW1vqhbT8pRhAM7n8ngGRLWbrG7nhRo27eJVPr
+0r+srwCVR+udfBS1RTnzHtXMDrpLY9GS59r9x/UquwHHsfyU1bQdU2FnZSBXZWls
+IDxzYWdlQG5ld2RyZWFtLm5ldD6IYQQTEQgAIQIbAwIeAQIXgAUCS1i+bgULCQgH
+AwUVCgkICwUWAgMBAAAKCRDaRCDtKImVyHpGAJ9ThO25++a5RK/HloILHOZfC2nc
+BgCfVpv7PqHlvJcNLGETMjK9OjdzK12IRgQQEQgABgUCS1i/yQAKCRDOlFtnvv7s
+ZKPLAKDL0hF9FEGmRCC4E7K2Og1E7ju76ACfSOFFdsso0wsvmLIzPeOuwixZBEaI
+YQQTEQgAIQUCS1i77wIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAKCRDaRCDt
+KImVyAJcAKCPXc3aC7PAHuG+tJGc+ozvobkpJgCg22rH3JRr9mtYdX6RzIJCde24
+jliIRgQQEQIABgUCS4NJBAAKCRD0mo+yK5g/WDl+AKCopuwEut14Uz7FksJ/LUVT
+rveKWACfbUCPrKIo7hpHHXm0w2eBsTRs/TOJAhwEEAECAAYFAkuDSRAACgkQk0OA
+6Dag0KrGAQ/+IHS8fUApyCuomo318MPpHXQ1wJlnysMgmtvTJDTpsWM51iCYB3fG
+Op2WHlddkRJtZkrK0Yxf6koK+HaBHj4vlQgDkGwdd9N1xqQz8N0ztrwtDOwlffdx
+TVzoqaAymSvb37Eptcet0Ru+aQB6hmyFkj02SNfJ4ncPTFgabDKpXMtsZAcbBw5I
+8UHXYw25XruE4M9yF4UGilySZ9wPHm5Na/F3XM/gAJS9O6+3uiB5TbXlPv1jnFbJ
++3bSb+2zz97TrqE2zvfvFd5kiUXbyYkHNvt/HJi8MQFPEeBmq6Le+dQ41jtpa0Zs
+dqm6EoJATeVRuzrTOk+7+FVlPZB5ZKQWErOavWln6VDaBogE0IH2KIdxuaC7LWLG
+mBcNQZGvq8Qosx60jLGYCtzdftr7pZzYl5QRsqPL7pAiiKds15HJzu4beSewPe9u
+QZ1MFQ249rHn170TNDE2LsUQp4JeO5E00MExyxP6EYB3Vo6mFWoSf377GrIHbVkd
+5ePPmUN3KAsK7omUwTKQpsbtcoz8oyMOgSFIkYA/SyI18riRKviL2eJQ2jI3a0M7
+G3SB2lVueyF/gf7E/+jNjeWMImd7glQ8s7v+Mbiuq7lszLljwgPsahCiVgvDS1pN
+PDx8E2/VDE437RZpCmLae/JTwL61BsY9F4bB0Q4z9sEQ7IFcx1xHdfiIRgQQEQIA
+BgUCS4NNKQAKCRANjRNR/daw26C8AKC6+goG/2EJ31oIg7nL/Qf1BwbHiwCfZbXY
+AqCRBDWQmky8Upqc48WCgv2JAhwEEwECAAYFAkuDgOUACgkQqQNBJhIwH6LhuhAA
+lFgek4UpgvuM5+3wysojdaD207WSadLAUU2uv+lSSGFaOFtEk0u41LbTSUjiFwkJ
+22s7qCb4eCWPO8VKpmjlp9SWyxDn3OMRR34idrle8bEPeIJBiwkUBWO3QpRfkrbL
+IuK5g9hHMKtUlKLXiFxc63RXNdekaygDlJuuvjLiN6IYvvDJK9S1Iy+PlodPjckR
+J3GtxOmD2k0TFqXrfEO6qdzNVM6XQJ01irBrl1r8PYdMdk+sRjJy6GXO0KBjFNl7
+JZw71hEyvVEzquQqy5F6xvfr1Um5Y7UHpjra9rA33qn7T2IHSAAV9RF3j1y4Bmsc
+o5BnMi1TmQa9zOC5oAszDPFxVfbxpHeX7v7QAiUdaCf9uw49fyOd8k0PO7Notasn
+DnZPMwqINBHaTXlIOtVYiXV3FUlARxpLrqZ7JwdcIXiqRCqRqv7NlDoob1vBE5Ew
+mLnAAdH6Lj/QIMkeEJzhR6k2KRPXq4SB/ggUmuzw/jAWdAyUr3kOXkPAwOKLMLh0
+FB3DK5QkbWkYGGm4zYBmVBSpcUt5dLUqX9O0qhKA1F287y8eF/E7ArCaoToVNAZx
+6swnNdQNgOqKASRikLANJB0Qof1iSVh6dMbJUR7D8iHuowcSPDP/1JGNn7VFDeo/
+QsQOaAtL+nhJlFpqIcwaTTXO7KTN0sHGAxlsZQtkppKIRgQTEQIABgUCS4S7NwAK
+CRAjcDdYmlP8Ku40AJwL1Hh/05el/VYQe/JgDB17UJVawACfX9vGPaJIvj5lpYic
+OGn4VnNlQJ2IRgQTEQIABgUCS4OP4AAKCRAR6PE9C66WSJotAJ9cKKaTbaHjcjvM
+YOnAhhuXcqD9KwCeMwxZTlfAVMEGyWcq5NOvnqAfzLqIRgQTEQIABgUCS4SxkwAK
+CRAhGTfVLP6ynenjAJ0XOHfVA1uAulVfHNtR4F8NGb6+pwCdEuC9RiBjNSV+vdCQ
+lFsFganlnNmIRgQTEQIABgUCS4TAdQAKCRD/XLkM/+lAh7PHAJ0caUFxh9JVKxNU
+pSXT3usvz/49wgCfYHTNkZYaTyML+zSZ3kpf2/UiPPGJARwEEwECAAYFAkuEy90A
+CgkQtlATpJN1Xgh0NAf/drKpNHibzd43ipM1KGA1CJI9QCU+pH8P37hf7KbpDBMz
+XKzsYzijuqUtlt+bokKpHj17OUpdGhvk3/gdHTBqjoj+2S4SZOrfhbbczCP9YgxX
+fHtPWVpxQDliND3B+N2LnwrOGBVtfL2QlHY5pfTevzOZnvo2ivNQ/5NqVjWc1zxe
+2yIhDFNaTfgr+GP8wO3G8vzY+kdPVCZA1SdBRCvMzJakFICpwXex/yi1CR89DtMV
+5eqD5pgtbW3z8J75hF+x/FOs87EIwtUUBqq5XKE81VDPF5d1gO/AQ5Aw1JYvFgL/
+dFl7BAc4yMB6n61Hnz7ykTtUuzcYDPNF8tNxjJ5BBIkBHAQQAQIABgUCS4YYvAAK
+CRCGiaJaiNv9F+PwCAC/1mCxwgnQ/rqEBCCIZ7YnIGDdoyLB+samhYNGbYRMX9HS
+8i6/bX/G6U9pbsRhHXdLPUrU5qpFfjec7RJg/+QJReHmCKbG+Ed4SvZDTlR/R/VB
+vOEyitV1+jnPyPJYNjvRC7E6IPL8n8iRPsGf5Pj/mAwPQIugu23TM/zcUGaWpkJe
+V7hElpXQsJx9pzvi/MC51L/wE0N+TClZGMoIIuCJrfkWrgZ/P4rtdlnH8fdTkEgj
+P0KGMZ1mevk4LLrzUqR9+TAmEzOnSNK2BmV49Dz7cGNyUBJrT7uTdor3KVUNHSjH
+C3kLm1gHxiHXrHeSUU/lwG1OQvijAGTqzM7qI3KfiEYEExECAAYFAkuGaakACgkQ
+O/Dos2Fb9DSB8wCfWwOU744GNjWzho7/dJbetsZFq74AnjFJcAOm6AhnS56UdJMx
+QCJBPK5WiEYEExECAAYFAkuHPcwACgkQ9e/J5tUGicVt2ACfUkYQe1ge6/Auq8nE
+KCkig2SCZJoAoJtdSPKPDLb5oyhnyl9VzRnAnSmmiEYEExECAAYFAkuHskcACgkQ
+LBV88STLCDnYvQCfRB7Bfd54qXnhoGKE/DCZ5pqNEj0An34Xc/H1gri/J74ajomK
+b1ZgI/dgtBtTYWdlIFdlaWwgPHNhZ2VAa2VybmVsLm9yZz6IYQQTEQgAIQUCS1jH
+/gIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAKCRDaRCDtKImVyIX/AJwNXg/7
+uBgx6wnd0cfZtYuxFKNwmwCfQBEAL6Ml9lams5KOTNlk6Rp1kKeIRgQQEQIABgUC
+S4NJBAAKCRD0mo+yK5g/WCZgAKDCGALmU3MINVnZ7xWhgeX2u447rACeKZ2gkcYf
+Ctv/6BhsIs+mQMDVQl6JAhwEEAECAAYFAkuDSRAACgkQk0OA6Dag0KpWGg/6A7f1
+yveGxH1vOdJHWFEW6OoMSpAEEhmV61Xl/ExPyqn+0Z8nHa9mKrkv8HP53Grfk7Vz
+QjoZPYWprIJsljuFSig+Dr5UaW8gyiFtzbICZXRmGI4yR1ZI0gGbWfx59Ypavd4U
+gWym7SCFOzSvwh9VyymB0DANmEp6MHH7KMnDOnbxu9TbDg2ShWVyR+DKNg9KZUM/
+AsT+vNJubKwo7wd9v6cJxVnZ1RO3Iyy3119Q0N0FP28YoijaWrIpguoBPgAeqkLn
+KgIVme+QP7cMQFxBqBwtSu7Jo5Jjo33CvmfXftUCV/JQCkbcbPR5thhh6wt/4VfM
+/VvQO3N03joR+brNEshS9YqdrVSl1IYdUJ9HR098Nkgjx7ZZrSYuCF3CeaxXsFgv
+xKPmZ34ZalnvvJsp2SE2vYS0NqH56fQ+pxCacO5u/OUXBvzFn/Ih/C7hGdPwUWnz
+t4lRo44Hb2uXGInSVIDOgxszvnToOy55yZe/sAUYE/HIjmjvGga3Qx4UH6kT0k/Q
+3YncOC5Y2eckPefwNvvtcv8oet2/d1zX81/R1CMCJJnuVDvI570gFGZd2/yHP8uH
+vP4aBFuQfoCeKsWx13+WQ4FCinwK1clnVeMxAY9CufO6OdSPy2rg8PECBaM8S3zV
+0MLO2kozQU7GCmTTZ3r7GcXCR2xeayFahHC5ZwGIRgQQEQIABgUCS4NNIAAKCRAN
+jRNR/daw232FAKCcVRTO27X2OIm+UFiIDfuo32ACRwCgvS0gMza5jH4H0vikeyWp
+c7Nn566JAhwEEwECAAYFAkuDgOAACgkQqQNBJhIwH6L9Mg/8DJ2F/CjNWqiI5DAC
+aoKEF4xszO/gqd15hnGe/OEgszMU5etm9EAKAW4a+2i2XgzdmxvRKudJS69Nze1D
+y1yzOB6688raRTNc3bd33Sd/vtAht1hwjI1UitxXr/71jxU+lEiXizFyU4gF1Omc
+LuyOj8Ap2m4eR6GlLu/MCTTLoBYGPGpiMQQ4kA6A1wRYQkemBnYozn2wUw12EuUs
+znaLClexhG2h/V6JPXdn/lTfeMQdLG/+qzVdN64ZvuLAGZHcmMjrdsrcSSvVLfYP
+5Hn43NO0W+mntQGyCgvllYUeVGhXHGnwRZnPiIm+vtkRtHmyv/T6ru4XUknGr7n8
+6LF4CEN2hCwMrFeb5cdYCPEnEgpON9f0KU+E96wxHVH844iXg5fDa98P2MpDEEQB
+Psvh11VzPpUQwdrPxfus+uf26PhOAxsyNAOt4VMtYtT6LxADf4tQmkh3OkmSe/1Q
+0oAm2M96I1Pf8iDdSG1xRISxd7zPbQZefNXwqMTJEt7YdqVJPTtAejmIcRFV5ypn
+XLvteeBYFZO5YWGOOeLedYOVRB7KVitYmWQRTQk8zZJyvKuF+/nmuseuczT31hlu
+JxJAjjbZ0PxSSkrNuwqiLqj8bMMKkXC2423g4SEJF6VzNKwUtKuGlKNLxCyOfNFQ
+IX4AlD/HgaWHd1giZFJMhwm94aGIRgQTEQIABgUCS4OP3QAKCRAR6PE9C66WSGDp
+AJ0dHSfxYPlUBB3f2b/iIAK4kbxmnwCbB7wXSFR1Rse5AnXOHa3adu3lnZaJARwE
+EAECAAYFAkuGGLwACgkQhomiWojb/ReoCAgA4SHL0WIarFR0ssaXh0lNO5DEY9xr
+qio/H0RN32BtkLSoYTxfQqwRIWTacfjOEX6NUq5oypE/vEnbFhDOPHqf+PBKp97D
+wNjS/07KyhrCA8LDbCQRHpmRNDimPwqFdOahXEctoYKFu+GCqfvism1AIHv26ZKA
+hSHjkMTfmDJeQbnvWgSRHU5bM2q+eo3Mt42fRbs+Z+QPYsSs0vDxEmcEkwP26oLT
+4UAueDVHlbyKEv3CwqUJu2uXYAmYOXD3AlaCaPfD0833Wk/wcTdwK9sj9P0O8/lv
+NQ8TSFkmbk9jkkKerxm2IbO5iz9STWDjauO2UvNWXeS5OV+fvAfargsyPrkEDQRL
+WLvvEBAAp215RD7U20GFb0VqZ1EmBJJlCFJALyxmPpxAjtabxZ5jjIwSSbcVSPt5
+mi3IvLpT56X5jMkUQXj+g5NcPFcquNJn4x8p3rTreiS6x3M/vuWYbZPMeCtACfQI
+bRDiix7eFX5aY1C2AwQAludAYPMmuiWS6HC8OttSJSrusJeHbSlVPsK46xX3c706
+HHJLXQuDP/in496KOZMVmWH8xhqAohdHrJyMNO4rj71R7pigHZT5YryuJu4fW95K
+9LrMkWG1J49CcLKiz4jiDfVSEZDBvDZZ9XkHqFbIKBVSWJnJr/k76myalTMpZGTq
+3RfdZ/soIeNU+yGscCGKKZvh2845oy6LJNotRMI0oy40bcfjzA7BTTipeZ9ChPRs
+H8RoQH1eoPREwLOqtk77gXKDgQgpWqQeRDt5+vF8lapwcnIpt95LKAM2epp7FZZ0
+Tv7lywU2SeFTSi5Gi7jYtWTtXzwCepSl+VXZ5ai3ZuQiCV0b38Ijds56SKkfw/ZU
+y8cZzs55SOAdamBTB0hB+6IVAKW0tVSTVCEBYLr+xu+Cfpj8tWEsFVs5e7t3EwBS
+sUF3WTLD6rjPNtSwRUhwxRkh41kKzQhUBteyK+ruk0Z5mNBxOphKqIGeMB5ry32S
+i/nnQiKNHaqX7t0RrTOlbxf8n0XvfUoEr4quRGrU1TzAGdoetF8ABR0P/j5FU5Iu
+rlzhJS9SiIu6pYqkBfk+fKvtEFGXYQfIsJQSiCmj6YHQs1WVvnqc8TeiwwvH0J8D
+fwDplkhUJ0VNz5KtehN8/uRhmJSpMc0Up0x/VajN2q7k5BNal0GC1/rOrYFTsdyi
+YvGl50D4dG5M7VmrYmaXTKZ1HeC0vTqcH12a0lGEB9IYjma1Y6MWxUr79wWPS361
+lsxuwdXpKoHKQfT8LUYrXMLmIuuCtZvqPHJ0HblIp9UBNwTfqL/8g3A3KyEdT6iM
+lF3cUPsigqISPVLFocQ3Q2LnypZkE3XoqM/JsK4sFfFwg4XDaIsORt+YG66vsZEt
+STwM44wxyZm5pgjloKsdY+UwP1w/OBPhhNZq/QPCsL+pKbdbt3Nx9CO84GiyoxhQ
+zVLKGmTFCvilyW3nauojV4WM+Evu3dVS56tq+oNk9ttOH+5UsLcvMiaA/hS3q/O2
+jf7wS8dHOdTn1elHqAJPUbe/HlReTrAU1ll4rujKWi8ZQabd4RoOTjfIPxAOHszO
+pmFwHrpuL/7jH8TGcNK/6GbVeOVFlFU40klJIJ5O5He91IqSFyZoTMqFqloeGxuC
+JJV+uVNEPEyM5LCLdag8BvCnfhZZbEiUd2A5th2Pc+yKwxZ3s/Np7HfYkkug5JZ5
+75g6DNinZfPFGga5g6DWgkdPyoxrS4zhU4qoiEkEGBEIAAkFAktYu+8CGwwACgkQ
+2kQg7SiJlchkaACglP00AlVap07y7/0Ul7XNS+C/seUAnRp6f33KAyusDGes/tMH
+xG6RQNuruQINBEtYvpkBEACvQ/WYV1nJRqyq/ffX7xIAxTGS4t80ra2bOaAURPNX
+aRDOPK15Toz4/Ct89eiEohA8xLsrpssC5cEx7oS5g/XBfYDhOXnwtNf/LAgp44GY
+jhTjP/Oc+1z3Q3SJvgmCJbEgKtAlzE7nXYcagjvNUIiQa9FgJ5M58vN284Evfm5C
+omIbduiJEAWMjJk8is8gNoX8zCdX0TIoaznOiJ1g3dsuUygZ/oZ4UFx6Ph5Lsaaq
+XGg6iBHA730Wk3xxOG4ndlkWuLbvKd0vb2I/umQOEcuPoxR6B1NLB2f6GulEY9/P
+JCfdPX3NbZgJoI8nS7GqDaJB45DXiuAU02W1kHU9vTQOKp6UJKUiff4NF7zpefef
+UNpMCWeyYKGyS3U4Bl17pEI1n9iH1oNyrLQljDlAGjhIAlT/oF/Qbm7nNl/iTO30
+HMXIuXy8yFfMtjolEhOiwRCCK+ooQ000BGwM17yppVNa1i6lDiiHvG3FdVoV1HF1
+hOgHXS4j6fbWmSOwKewKHK2+GBJBxppuedLzuxCtxmQOwFBhAoiJokCXDo1nGE85
++/dJdyN/dZyBCQ3NyC8A7BB6nK3uUWO/b4wwWZjpxpyrHs6+SgrKIkj12rnaQuAI
+SNbVD++y/QJcUVXlqsJkVj4v04aWAwdNWlOrYT770xUzc8xUmPbsd89QgLfqyMFo
+hQARAQABiEkEGBEIAAkFAktYvpkCGwwACgkQ2kQg7SiJlcgRgwCghJWbAL8wv7Q0
+tS2vGOq9DWh1N28An2yMBPSf/WaHuo+mtsKqMk3enf6R
+=6gus
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/man/mkcephfs.8 b/man/mkcephfs.8
index f066d789462..51fed771f90 100644
--- a/man/mkcephfs.8
+++ b/man/mkcephfs.8
@@ -61,7 +61,7 @@ executing commands via SSH.
Use the given conf file instead of the default \fI/etc/ceph/ceph.conf\fP.
.TP
\fB\-k\fI /path/to/keyring\fR
-When \fB\-a\fR is used, we can special a location to copy the
+When \fB\-a\fR is used, we can specify a location to copy the
client.admin keyring, which is used to administer the cluster. The
default is \fI/etc/ceph/keyring\fP (or whatever is specified in the
config file).
diff --git a/qa/workunits/false.sh b/qa/workunits/false.sh
new file mode 100644
index 00000000000..8a961b329d5
--- /dev/null
+++ b/qa/workunits/false.sh
@@ -0,0 +1,3 @@
+#!/bin/sh -ex
+
+false \ No newline at end of file
diff --git a/src/Makefile.am b/src/Makefile.am
index e53b68def91..5139235121e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -317,12 +317,12 @@ my_radosgw_src = \
rgw/rgw_access.cc \
rgw/rgw_op.cc \
rgw/rgw_rest.cc \
- rgw/rgw_rest_os.cc \
+ rgw/rgw_rest_swift.cc \
rgw/rgw_rest_s3.cc \
rgw/rgw_common.cc \
rgw/rgw_cache.cc \
- rgw/rgw_os.cc \
- rgw/rgw_os_auth.cc \
+ rgw/rgw_swift.cc \
+ rgw/rgw_swift_auth.cc \
rgw/rgw_formats.cc \
rgw/rgw_log.cc \
rgw/rgw_multi.cc \
@@ -885,6 +885,7 @@ libmds_a_SOURCES = \
mds/Dumper.cc \
mds/Resetter.cc \
mds/MDS.cc \
+ mds/flock.cc \
mds/locks.c \
mds/journal.cc \
mds/Server.cc \
@@ -1118,8 +1119,8 @@ noinst_HEADERS = \
include/rbd/librbd.h\
include/rbd/librbd.hpp\
logrotate.conf\
- mds/flock.h\
mds/inode_backtrace.h\
+ mds/flock.h\
mds/locks.c\
mds/locks.h\
mds/Anchor.h\
@@ -1333,11 +1334,11 @@ noinst_HEADERS = \
rgw/rgw_log.h\
rgw/rgw_multi.h\
rgw/rgw_op.h\
- rgw/rgw_os.h\
- rgw/rgw_os_auth.h\
+ rgw/rgw_swift.h\
+ rgw/rgw_swift_auth.h\
rgw/rgw_rados.h\
rgw/rgw_rest.h\
- rgw/rgw_rest_os.h\
+ rgw/rgw_rest_swift.h\
rgw/rgw_rest_s3.h\
rgw/rgw_tools.h\
rgw/rgw_bucket.h\
diff --git a/src/client/Client.cc b/src/client/Client.cc
index bb2cfb90c43..c5fa15abf79 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -2948,11 +2948,13 @@ void Client::handle_cap_import(Inode *in, MClientCaps *m)
m->get_caps(), m->get_seq(), m->get_mseq(), m->get_realm(),
CEPH_CAP_FLAG_AUTH);
- // reflush any/all caps
- if (in->cap_snaps.size())
- flush_snaps(in, true);
- if (in->flushing_caps)
- flush_caps(in, mds);
+ if (in->auth_cap && in->auth_cap->session->mds_num == mds) {
+ // reflush any/all caps (if we are now the auth_cap)
+ if (in->cap_snaps.size())
+ flush_snaps(in, true);
+ if (in->flushing_caps)
+ flush_caps(in, mds);
+ }
if (m->get_mseq() > in->exporting_mseq) {
ldout(cct, 5) << "handle_cap_import ino " << m->get_ino() << " mseq " << m->get_mseq()
@@ -3279,7 +3281,7 @@ void Client::unmount()
// NOTE: i'm assuming all caches are already flushing (because all files are closed).
- //clean up any unclosed files
+ // clean up any unclosed files
if (!fd_map.empty())
std::cerr << "Warning: Some files were not closed prior to unmounting;\n"
<< "Ceph is closing them now.\n";
@@ -3287,7 +3289,8 @@ void Client::unmount()
int fd = fd_map.begin()->first;
assert(fd_map.count(fd));
Fh *fh = fd_map[fd];
- _release(fh);
+ lderr(cct) << " destroying lost open file " << fh << " on " << *fh->inode << dendl;
+ _release_fh(fh);
fd_map.erase(fd);
}
@@ -4823,6 +4826,29 @@ Fh *Client::_create_fh(Inode *in, int flags, int cmode)
return f;
}
+int Client::_release_fh(Fh *f)
+{
+ //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
+ //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
+ Inode *in = f->inode;
+ ldout(cct, 5) << "_release_fh " << f << " mode " << f->mode << " on " << *in << dendl;
+
+ if (in->snapid == CEPH_NOSNAP) {
+ if (in->put_open_ref(f->mode)) {
+ _flush(in);
+ check_caps(in, false);
+ }
+ } else {
+ assert(in->snap_cap_refs > 0);
+ in->snap_cap_refs--;
+ }
+
+ put_inode( in );
+ delete f;
+
+ return 0;
+}
+
int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp, int uid, int gid)
{
int cmode = ceph_flags_to_mode(flags);
@@ -4866,9 +4892,6 @@ int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp, int uid, int gid)
return result;
}
-
-
-
int Client::close(int fd)
{
ldout(cct, 3) << "close enter(" << fd << ")" << dendl;
@@ -4878,36 +4901,12 @@ int Client::close(int fd)
assert(fd_map.count(fd));
Fh *fh = fd_map[fd];
- _release(fh);
+ _release_fh(fh);
fd_map.erase(fd);
ldout(cct, 3) << "close exit(" << fd << ")" << dendl;
return 0;
}
-int Client::_release(Fh *f)
-{
- //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
- //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
- Inode *in = f->inode;
- ldout(cct, 5) << "_release " << f << " mode " << f->mode << " on " << *in << dendl;
-
- if (in->snapid == CEPH_NOSNAP) {
- if (in->put_open_ref(f->mode)) {
- _flush(in);
- check_caps(in, false);
- }
- } else {
- assert(in->snap_cap_refs > 0);
- in->snap_cap_refs--;
- }
-
- put_inode( in );
- delete f;
-
- return 0;
-}
-
-
// ------------
// read, write
@@ -6735,7 +6734,7 @@ int Client::ll_release(Fh *fh)
tout(cct) << "ll_release" << std::endl;
tout(cct) << (unsigned long)fh << std::endl;
- _release(fh);
+ _release_fh(fh);
return 0;
}
diff --git a/src/client/Client.h b/src/client/Client.h
index c094082c92f..f4a5f1404f7 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -467,6 +467,7 @@ private:
void _ll_drop_pins();
Fh *_create_fh(Inode *in, int flags, int cmode);
+ int _release_fh(Fh *fh);
int _read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl);
int _read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl);
@@ -491,7 +492,6 @@ private:
int _removexattr(Inode *in, const char *nm, int uid=-1, int gid=-1);
int _open(Inode *in, int flags, mode_t mode, Fh **fhp, int uid=-1, int gid=-1);
int _create(Inode *in, const char *name, int flags, mode_t mode, Inode **inp, Fh **fhp, int uid=-1, int gid=-1);
- int _release(Fh *fh);
loff_t _lseek(Fh *fh, loff_t offset, int whence);
int _read(Fh *fh, int64_t offset, uint64_t size, bufferlist *bl);
int _write(Fh *fh, int64_t offset, uint64_t size, const char *buf);
diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
index 9ea7e16d59c..aeaceec3f10 100644
--- a/src/client/fuse_ll.cc
+++ b/src/client/fuse_ll.cc
@@ -317,7 +317,7 @@ static void ceph_ll_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent,
static void ceph_ll_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
{
const struct fuse_ctx *ctx = fuse_req_ctx(req);
- Fh *fh;
+ Fh *fh = NULL;
int r = client->ll_open(fino_vino(ino), fi->flags, &fh, ctx->uid, ctx->gid);
if (r == 0) {
fi->fh = (long)fh;
@@ -443,7 +443,7 @@ static void ceph_ll_create(fuse_req_t req, fuse_ino_t parent, const char *name,
const struct fuse_ctx *ctx = fuse_req_ctx(req);
struct fuse_entry_param fe;
memset(&fe, 0, sizeof(fe));
- Fh *fh;
+ Fh *fh = NULL;
int r = client->ll_create(fino_vino(parent), name, mode, fi->flags, &fe.attr, &fh, ctx->uid, ctx->gid);
if (r == 0) {
fi->fh = (long)fh;
diff --git a/src/common/config.cc b/src/common/config.cc
index c75e066b51f..f618d9d2e40 100644
--- a/src/common/config.cc
+++ b/src/common/config.cc
@@ -433,6 +433,11 @@ struct config_option config_optionsp[] = {
OPTION(rgw_socket_path, OPT_STR, NULL), // path to unix domain socket, if not specified, rgw will not run as external fcgi
OPTION(rgw_op_thread_timeout, OPT_INT, 10*60),
OPTION(rgw_op_thread_suicide_timeout, OPT_INT, 60*60),
+ OPTION(rgw_thread_pool_size, OPT_INT, 100),
+ OPTION(rgw_maintenance_tick_interval, OPT_DOUBLE, 10.0),
+ OPTION(rgw_pools_preallocate_max, OPT_INT, 100),
+ OPTION(rgw_pools_preallocate_threshold, OPT_INT, 70),
+ OPTION(rgw_log_nonexistent_bucket, OPT_BOOL, false),
// see config.h
OPTION(internal_safe_to_start_threads, OPT_BOOL, false),
diff --git a/src/common/config.h b/src/common/config.h
index 1d4bfbfc9be..b072edbff32 100644
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -567,6 +567,11 @@ public:
string rgw_socket_path;
int rgw_op_thread_timeout;
int rgw_op_thread_suicide_timeout;
+ int rgw_thread_pool_size;
+ double rgw_maintenance_tick_interval;
+ int rgw_pools_preallocate_max;
+ int rgw_pools_preallocate_threshold;
+ bool rgw_log_nonexistent_bucket;
// This will be set to true when it is safe to start threads.
// Once it is true, it will never change.
diff --git a/src/cosd.cc b/src/cosd.cc
index 82fa291901c..e2ce3cc347d 100644
--- a/src/cosd.cc
+++ b/src/cosd.cc
@@ -202,15 +202,13 @@ int main(int argc, const char **argv)
exit(0);
}
- int err = OSD::convertfs(g_conf->osd_data, g_conf->osd_journal);
- if (err < 0) {
- derr << TEXT_RED << " ** ERROR: error converting store " << g_conf->osd_data
- << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
- exit(1);
- }
if (convertfilestore) {
- derr << "Converted Filestore " << g_conf->osd_data << dendl;
- exit(0);
+ int err = OSD::convertfs(g_conf->osd_data, g_conf->osd_journal);
+ if (err < 0) {
+ derr << TEXT_RED << " ** ERROR: error converting store " << g_conf->osd_data
+ << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
+ exit(1);
+ }
}
string magic;
@@ -302,6 +300,14 @@ int main(int argc, const char **argv)
// Leave stderr open in case we need to report errors.
global_init_daemonize(g_ceph_context, CINIT_FLAG_NO_CLOSE_STDERR);
common_init_finish(g_ceph_context);
+
+ int err = OSD::convertfs(g_conf->osd_data, g_conf->osd_journal);
+ if (err < 0) {
+ derr << TEXT_RED << " ** ERROR: error converting store " << g_conf->osd_data
+ << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
+ exit(1);
+ }
+
MonClient mc(g_ceph_context);
if (mc.build_initial_monmap() < 0)
return -1;
diff --git a/src/doc/object_store.dot b/src/doc/object_store.dot
deleted file mode 100644
index 08328f07a24..00000000000
--- a/src/doc/object_store.dot
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Rough outline of object store module dependencies
- *
- * build with
- * dot -Tpng < ./object_store.dot > object_store.png
- */
-
-digraph object_store {
- size="16,16";
- node [color=lightblue2, style=filled, fontname="Serif"];
-
- "testrados" -> "librados"
- "testradospp" -> "librados"
-
- "rbd" -> "librados"
-
- "radostool" -> "librados"
-
- "radosgw_admin" -> "rgw"
-
- "rgw" -> "librados"
-
- "radosacl" -> "librados"
-
- "librados" -> "objecter"
-
- "ObjectCacher" -> "Filer"
-
- "dumpjournal" -> "Journaler"
-
- "Journaler" -> "Filer"
-
- "SyntheticClient" -> "Filer"
- "SyntheticClient" -> "objecter"
-
- "Filer" -> "objecter"
-
- "objecter" -> "OSDMap"
-
- "cosd" -> "PG"
- "cosd" -> "ObjectStore"
-
- "crushtool" -> "CrushWrapper"
-
- "OSDMap" -> "CrushWrapper"
-
- "OSDMapTool" -> "OSDMap"
-
- "PG" -> "ReplicatedPG"
- "PG" -> "ObjectStore"
- "PG" -> "OSDMap"
-
- "ReplicatedPG" -> "ObjectStore"
- "ReplicatedPG" -> "OSDMap"
-
- "ObjectStore" -> "FileStore"
-
- "FileStore" -> "ext3"
- "FileStore" -> "ext4"
- "FileStore" -> "btrfs"
-}
diff --git a/src/include/object.h b/src/include/object.h
index bc56445fd1e..706caa6bffc 100644
--- a/src/include/object.h
+++ b/src/include/object.h
@@ -261,14 +261,15 @@ namespace __gnu_cxx {
struct hobject_t {
object_t oid;
+ string key;
snapid_t snap;
uint32_t hash;
hobject_t() : snap(0), hash(0) {}
- hobject_t(object_t oid, snapid_t snap, uint32_t hash) :
- oid(oid), snap(snap), hash(hash) {}
- hobject_t(const sobject_t &soid, uint32_t hash) :
- oid(soid.oid), snap(soid.snap), hash(hash) {}
+ hobject_t(object_t oid, const string &key, snapid_t snap, uint32_t hash) :
+ oid(oid), key(key), snap(snap), hash(hash) {}
+ hobject_t(const sobject_t &soid, const string &key, uint32_t hash) :
+ oid(soid.oid), key(key), snap(soid.snap), hash(hash) {}
/* Do not use when a particular hash function is needed */
explicit hobject_t(const sobject_t &o) :
@@ -279,9 +280,11 @@ struct hobject_t {
void swap(hobject_t &o) {
hobject_t temp(o);
o.oid = oid;
+ o.key = key;
o.snap = snap;
o.hash = hash;
oid = temp.oid;
+ key = temp.key;
snap = temp.snap;
hash = temp.hash;
}
@@ -291,8 +294,9 @@ struct hobject_t {
}
void encode(bufferlist& bl) const {
- __u8 version = 0;
+ __u8 version = 1;
::encode(version, bl);
+ ::encode(key, bl);
::encode(oid, bl);
::encode(snap, bl);
::encode(hash, bl);
@@ -300,6 +304,8 @@ struct hobject_t {
void decode(bufferlist::iterator& bl) {
__u8 version;
::decode(version, bl);
+ if (version >= 1)
+ ::decode(key, bl);
::decode(oid, bl);
::decode(snap, bl);
::decode(hash, bl);
diff --git a/src/include/rbd/librbd.h b/src/include/rbd/librbd.h
index 8389f98972a..886e04e8636 100644
--- a/src/include/rbd/librbd.h
+++ b/src/include/rbd/librbd.h
@@ -37,8 +37,7 @@ extern "C" {
typedef void *rbd_snap_t;
typedef void *rbd_image_t;
-typedef int (*librbd_copy_progress_fn_t)(uint64_t offset, uint64_t src_size,
- void *data);
+typedef int (*librbd_progress_fn_t)(uint64_t offset, uint64_t total, void *ptr);
typedef struct {
uint64_t id;
@@ -73,7 +72,7 @@ int rbd_resize(rbd_image_t image, uint64_t size);
int rbd_stat(rbd_image_t image, rbd_image_info_t *info, size_t infosize);
int rbd_copy(rbd_image_t image, rados_ioctx_t dest_io_ctx, const char *destname);
int rbd_copy_with_progress(rbd_image_t image, rados_ioctx_t dest_p,
- const char *destname, librbd_copy_progress_fn_t fn, void *data);
+ const char *destname, librbd_progress_fn_t fn, void *data);
/* snapshots */
int rbd_snap_list(rbd_image_t image, rbd_snap_info_t *snaps, int *max_snaps);
diff --git a/src/include/rbd/librbd.hpp b/src/include/rbd/librbd.hpp
index 956f072576e..6a685cccf8b 100644
--- a/src/include/rbd/librbd.hpp
+++ b/src/include/rbd/librbd.hpp
@@ -41,12 +41,12 @@ namespace librbd {
typedef rbd_image_info_t image_info_t;
-class ProgressContext
-{
-public:
- virtual ~ProgressContext();
- virtual int update_progress(uint64_t offset, uint64_t src_size) = 0;
-};
+ class ProgressContext
+ {
+ public:
+ virtual ~ProgressContext();
+ virtual int update_progress(uint64_t offset, uint64_t total) = 0;
+ };
class RBD
{
diff --git a/src/librbd.cc b/src/librbd.cc
index 3846cb7aeae..0a31e278c1f 100644
--- a/src/librbd.cc
+++ b/src/librbd.cc
@@ -1053,7 +1053,7 @@ public:
class CProgressContext : public librbd::ProgressContext
{
public:
- CProgressContext(librbd_copy_progress_fn_t fn, void *data)
+ CProgressContext(librbd_progress_fn_t fn, void *data)
: m_fn(fn), m_data(data)
{
}
@@ -1062,7 +1062,7 @@ public:
return m_fn(offset, src_size, m_data);
}
private:
- librbd_copy_progress_fn_t m_fn;
+ librbd_progress_fn_t m_fn;
void *m_data;
};
@@ -1761,7 +1761,7 @@ extern "C" int rbd_copy(rbd_image_t image, rados_ioctx_t dest_p, const char *des
}
extern "C" int rbd_copy_with_progress(rbd_image_t image, rados_ioctx_t dest_p,
- const char *destname, librbd_copy_progress_fn_t fn, void *data)
+ const char *destname, librbd_progress_fn_t fn, void *data)
{
librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
librados::IoCtx dest_io_ctx;
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index aa306adfcb8..528d1f8b970 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -129,6 +129,10 @@ ostream& operator<<(ostream& out, CInode& in)
if (in.is_freezing_inode()) out << " FREEZING=" << in.auth_pin_freeze_allowance;
if (in.is_frozen_inode()) out << " FROZEN";
+ inode_t *pi = in.get_projected_inode();
+ if (pi->is_truncating())
+ out << " truncating(" << pi->truncate_from << " to " << pi->truncate_size << ")";
+
// anchors
if (in.is_anchored())
out << " anc";
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index e07479e2fe4..5422f5b4992 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -5232,6 +5232,8 @@ void MDCache::_truncate_inode(CInode *in, LogSegment *ls)
<< pi->truncate_from << " -> " << pi->truncate_size
<< " on " << *in << dendl;
+ assert(pi->is_truncating());
+
in->auth_pin(this);
SnapRealm *realm = in->find_snaprealm();
diff --git a/src/mds/flock.cc b/src/mds/flock.cc
new file mode 100644
index 00000000000..b97aa013118
--- /dev/null
+++ b/src/mds/flock.cc
@@ -0,0 +1,488 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include <errno.h>
+
+#include "common/debug.h"
+#include "mdstypes.h"
+#include "mds/flock.h"
+
+bool ceph_lock_state_t::is_waiting(ceph_filelock &fl)
+{
+ multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+ while (p != waiting_locks.end()) {
+ if (p->second.start > fl.start)
+ return false;
+ if (p->second.length == fl.length &&
+ p->second.client == fl.client &&
+ p->second.pid == fl.pid &&
+ p->second.pid_namespace == fl.pid_namespace)
+ return true;
+ ++p;
+ }
+ return false;
+}
+
+void ceph_lock_state_t::remove_waiting(ceph_filelock& fl)
+{
+ multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+ while (p != waiting_locks.end()) {
+ if (p->second.start > fl.start)
+ return;
+ if (p->second.length == fl.length &&
+ p->second.client == fl.client &&
+ p->second.pid == fl.pid &&
+ p->second.pid_namespace == fl.pid_namespace) {
+ waiting_locks.erase(p);
+ return;
+ }
+ ++p;
+ }
+}
+
+bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock, bool wait_on_fail)
+{
+ dout(15) << "add_lock " << new_lock << dendl;
+ bool ret = false;
+ list<multimap<uint64_t, ceph_filelock>::iterator>
+ overlapping_locks, self_overlapping_locks, neighbor_locks;
+
+ // first, get any overlapping locks and split them into owned-by-us and not
+ if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
+ dout(15) << "got overlapping lock, splitting by owner" << dendl;
+ split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
+ }
+ if (!overlapping_locks.empty()) { //overlapping locks owned by others :(
+ if (CEPH_LOCK_EXCL == new_lock.type) {
+ //can't set, we want an exclusive
+ dout(15) << "overlapping lock, and this lock is exclusive, can't set"
+ << dendl;
+ if (wait_on_fail) {
+ waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+ }
+ } else { //shared lock, check for any exclusive locks blocking us
+ if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
+ dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
+ if (wait_on_fail) {
+ waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+ }
+ } else {
+ //yay, we can insert a shared lock
+ dout(15) << "inserting shared lock" << dendl;
+ adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
+ held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+ ret = true;
+ }
+ }
+ } else { //no overlapping locks except our own
+ adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
+ dout(15) << "no conflicts, inserting " << new_lock << dendl;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (new_lock.start, new_lock));
+ ret = true;
+ }
+ if (ret)
+ ++client_held_lock_counts[(client_t)new_lock.client];
+ else if (wait_on_fail)
+ ++client_waiting_lock_counts[(client_t)new_lock.client];
+ return ret;
+}
+
+void ceph_lock_state_t::look_for_lock(ceph_filelock& testing_lock)
+{
+ list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
+ self_overlapping_locks;
+ if (get_overlapping_locks(testing_lock, overlapping_locks)) {
+ split_by_owner(testing_lock, overlapping_locks, self_overlapping_locks);
+ }
+ if (!overlapping_locks.empty()) { //somebody else owns overlapping lock
+ if (CEPH_LOCK_EXCL == testing_lock.type) { //any lock blocks it
+ testing_lock = (*overlapping_locks.begin())->second;
+ } else {
+ ceph_filelock *blocking_lock;
+ if ((blocking_lock = contains_exclusive_lock(overlapping_locks))) {
+ testing_lock = *blocking_lock;
+ } else { //nothing blocking!
+ testing_lock.type = CEPH_LOCK_UNLOCK;
+ }
+ }
+ return;
+ }
+ //if we get here, only our own locks block
+ testing_lock.type = CEPH_LOCK_UNLOCK;
+}
+
+void ceph_lock_state_t::remove_lock(ceph_filelock removal_lock,
+ list<ceph_filelock>& activated_locks)
+{
+ list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
+ self_overlapping_locks, crossed_waiting_locks;
+ if (get_overlapping_locks(removal_lock, overlapping_locks)) {
+ dout(15) << "splitting by owner" << dendl;
+ split_by_owner(removal_lock, overlapping_locks, self_overlapping_locks);
+ } else dout(15) << "attempt to remove lock at " << removal_lock.start
+ << " but no locks there!" << dendl;
+ bool remove_to_end = (0 == removal_lock.length);
+ bool old_lock_to_end;
+ uint64_t removal_start = removal_lock.start;
+ uint64_t removal_end = removal_start + removal_lock.length - 1;
+ uint64_t old_lock_end;
+ __s64 old_lock_client = 0;
+ ceph_filelock *old_lock;
+
+ dout(15) << "examining " << self_overlapping_locks.size()
+ << " self-overlapping locks for removal" << dendl;
+ for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = self_overlapping_locks.begin();
+ iter != self_overlapping_locks.end();
+ ++iter) {
+ dout(15) << "self overlapping lock " << (*iter)->second << dendl;
+ old_lock = &(*iter)->second;
+ old_lock_to_end = (0 == old_lock->length);
+ old_lock_end = old_lock->start + old_lock->length - 1;
+ old_lock_client = old_lock->client;
+ if (remove_to_end) {
+ if (old_lock->start < removal_start) {
+ old_lock->length = removal_start - old_lock->start;
+ } else {
+ dout(15) << "erasing " << (*iter)->second << dendl;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ } else if (old_lock_to_end) {
+ ceph_filelock append_lock = *old_lock;
+ append_lock.start = removal_end+1;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (append_lock.start, append_lock));
+ ++client_held_lock_counts[(client_t)old_lock->client];
+ if (old_lock->start >= removal_start) {
+ dout(15) << "erasing " << (*iter)->second << dendl;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ } else old_lock->length = removal_start - old_lock->start;
+ } else {
+ if (old_lock_end > removal_end) {
+ ceph_filelock append_lock = *old_lock;
+ append_lock.start = removal_end + 1;
+ append_lock.length = old_lock_end - append_lock.start + 1;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (append_lock.start, append_lock));
+ ++client_held_lock_counts[(client_t)old_lock->client];
+ }
+ if (old_lock->start < removal_start) {
+ old_lock->length = removal_start - old_lock->start;
+ } else {
+ dout(15) << "erasing " << (*iter)->second << dendl;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ }
+ if (!client_held_lock_counts[old_lock_client]) {
+ client_held_lock_counts.erase(old_lock_client);
+ }
+ }
+}
+
+bool ceph_lock_state_t::remove_all_from (client_t client)
+{
+ bool cleared_any = false;
+ if (client_held_lock_counts.count(client)) {
+ remove_all_from(client, held_locks);
+ client_held_lock_counts.erase(client);
+ cleared_any = true;
+ }
+ if (client_waiting_lock_counts.count(client)) {
+ remove_all_from(client, waiting_locks);
+ client_waiting_lock_counts.erase(client);
+ }
+ return cleared_any;
+}
+
+void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::iterator> old_locks,
+ ceph_filelock& new_lock,
+ list<multimap<uint64_t, ceph_filelock>::iterator>
+ neighbor_locks)
+{
+ dout(15) << "adjust_locks" << dendl;
+ bool new_lock_to_end = (0 == new_lock.length);
+ bool old_lock_to_end;
+ uint64_t new_lock_start = new_lock.start;
+ uint64_t new_lock_end = new_lock.start + new_lock.length - 1;
+ uint64_t old_lock_start, old_lock_end;
+ __s64 old_lock_client = 0;
+ ceph_filelock *old_lock;
+ for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = old_locks.begin();
+ iter != old_locks.end();
+ ++iter) {
+ old_lock = &(*iter)->second;
+ dout(15) << "adjusting lock: " << *old_lock << dendl;
+ old_lock_to_end = (0 == old_lock->length);
+ old_lock_start = old_lock->start;
+ old_lock_end = old_lock->start + old_lock->length - 1;
+ new_lock_start = new_lock.start;
+ new_lock_end = new_lock.start + new_lock.length - 1;
+ old_lock_client = old_lock->client;
+ if (new_lock_to_end || old_lock_to_end) {
+ //special code path to deal with a length set at 0
+ dout(15) << "one lock extends forever" << dendl;
+ if (old_lock->type == new_lock.type) {
+ //just unify them in new lock, remove old lock
+ dout(15) << "same lock type, unifying" << dendl;
+ new_lock.start = (new_lock_start < old_lock_start) ? new_lock_start :
+ old_lock_start;
+ new_lock.length = 0;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ } else { //not same type, have to keep any remains of old lock around
+ dout(15) << "shrinking old lock" << dendl;
+ if (new_lock_to_end) {
+ if (old_lock_start < new_lock_start) {
+ old_lock->length = new_lock_start - old_lock_start;
+ } else {
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ } else { //old lock extends past end of new lock
+ ceph_filelock appended_lock = *old_lock;
+ appended_lock.start = new_lock_end + 1;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (appended_lock.start, appended_lock));
+ ++client_held_lock_counts[(client_t)old_lock->client];
+ if (old_lock_start < new_lock_start) {
+ old_lock->length = new_lock_start - old_lock_start;
+ } else {
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ }
+ }
+ } else {
+ if (old_lock->type == new_lock.type) { //just merge them!
+ dout(15) << "merging locks, they're the same type" << dendl;
+ new_lock.start = (old_lock_start < new_lock_start ) ? old_lock_start :
+ new_lock_start;
+ int new_end = (new_lock_end > old_lock_end) ? new_lock_end :
+ old_lock_end;
+ new_lock.length = new_end - new_lock.start + 1;
+ dout(15) << "erasing lock " << (*iter)->second << dendl;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ } else { //we'll have to update sizes and maybe make new locks
+ dout(15) << "locks aren't same type, changing sizes" << dendl;
+ if (old_lock_end > new_lock_end) { //add extra lock after new_lock
+ ceph_filelock appended_lock = *old_lock;
+ appended_lock.start = new_lock_end + 1;
+ appended_lock.length = old_lock_end - appended_lock.start + 1;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (appended_lock.start, appended_lock));
+ ++client_held_lock_counts[(client_t)old_lock->client];
+ }
+ if (old_lock_start < new_lock_start) {
+ old_lock->length = new_lock_start - old_lock_start;
+ } else { //old_lock starts inside new_lock, so remove it
+ //if it extended past new_lock_end it's been replaced
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ }
+ }
+ if (!client_held_lock_counts[old_lock_client]) {
+ client_held_lock_counts.erase(old_lock_client);
+ }
+ }
+
+ //make sure to coalesce neighboring locks
+ for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = neighbor_locks.begin();
+ iter != neighbor_locks.end();
+ ++iter) {
+ old_lock = &(*iter)->second;
+ old_lock_client = old_lock->client;
+ dout(15) << "lock to coalesce: " << *old_lock << dendl;
+ /* because if it's a neibhoring lock there can't be any self-overlapping
+ locks that covered it */
+ if (old_lock->type == new_lock.type) { //merge them
+ if (0 == new_lock.length) {
+ if (old_lock->start + old_lock->length == new_lock.start) {
+ new_lock.start = old_lock->start;
+ } else assert(0); /* if there's no end to new_lock, the neighbor
+ HAS TO be to left side */
+ } else if (0 == old_lock->length) {
+ if (new_lock.start + new_lock.length == old_lock->start) {
+ new_lock.length = 0;
+ } else assert(0); //same as before, but reversed
+ } else {
+ if (old_lock->start + old_lock->length == new_lock.start) {
+ new_lock.start = old_lock->start;
+ new_lock.length = old_lock->length + new_lock.length;
+ } else if (new_lock.start + new_lock.length == old_lock->start) {
+ new_lock.length = old_lock->length + new_lock.length;
+ }
+ }
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ if (!client_held_lock_counts[old_lock_client]) {
+ client_held_lock_counts.erase(old_lock_client);
+ }
+ }
+}
+
+void ceph_lock_state_t::remove_all_from(client_t client,
+ multimap<uint64_t,
+ ceph_filelock>& locks)
+{
+ multimap<uint64_t, ceph_filelock>::iterator iter = locks.begin();
+ while (iter != locks.end()) {
+ if ((client_t)iter->second.client == client) {
+ locks.erase(iter++);
+ } else ++iter;
+ }
+}
+
+multimap<uint64_t, ceph_filelock>::iterator
+ceph_lock_state_t::get_lower_bound(uint64_t start,
+ multimap<uint64_t, ceph_filelock>& lock_map)
+{
+ multimap<uint64_t, ceph_filelock>::iterator lower_bound =
+ lock_map.lower_bound(start);
+ if ((lower_bound->first != start)
+ && (start != 0)
+ && (lower_bound != lock_map.begin())) --lower_bound;
+ if (lock_map.end() == lower_bound)
+ dout(15) << "get_lower_dout(15)eturning end()" << dendl;
+ else dout(15) << "get_lower_bound returning iterator pointing to "
+ << lower_bound->second << dendl;
+ return lower_bound;
+ }
+
+multimap<uint64_t, ceph_filelock>::iterator
+ceph_lock_state_t::get_last_before(uint64_t end,
+ multimap<uint64_t, ceph_filelock>& lock_map)
+{
+ multimap<uint64_t, ceph_filelock>::iterator last =
+ lock_map.upper_bound(end);
+ if (last != lock_map.begin()) --last;
+ if (lock_map.end() == last)
+ dout(15) << "get_last_before returning end()" << dendl;
+ else dout(15) << "get_last_before returning iterator pointing to "
+ << last->second << dendl;
+ return last;
+}
+
+bool ceph_lock_state_t::share_space(
+ multimap<uint64_t, ceph_filelock>::iterator& iter,
+ uint64_t start, uint64_t end)
+{
+ bool ret = ((iter->first >= start && iter->first <= end) ||
+ ((iter->first < start) &&
+ (((iter->first + iter->second.length - 1) >= start) ||
+ (0 == iter->second.length))));
+ dout(15) << "share_space got start: " << start << ", end: " << end
+ << ", lock: " << iter->second << ", returning " << ret << dendl;
+ return ret;
+}
+
+bool ceph_lock_state_t::get_overlapping_locks(ceph_filelock& lock,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> & overlaps,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> *self_neighbors)
+{
+ dout(15) << "get_overlapping_locks" << dendl;
+ // create a lock starting one earlier and ending one later
+ // to check for neighbors
+ ceph_filelock neighbor_check_lock = lock;
+ if (neighbor_check_lock.start != 0) {
+ neighbor_check_lock.start = neighbor_check_lock.start - 1;
+ if (neighbor_check_lock.length)
+ neighbor_check_lock.length = neighbor_check_lock.length + 2;
+ } else {
+ if (neighbor_check_lock.length)
+ neighbor_check_lock.length = neighbor_check_lock.length + 1;
+ }
+ //find the last held lock starting at the point after lock
+ uint64_t endpoint = lock.start;
+ if (lock.length) {
+ endpoint += lock.length;
+ } else {
+ endpoint = uint64_t(-1); // max offset
+ }
+ multimap<uint64_t, ceph_filelock>::iterator iter =
+ get_last_before(endpoint, held_locks);
+ bool cont = iter != held_locks.end();
+ while(cont) {
+ if (share_space(iter, lock)) {
+ overlaps.push_front(iter);
+ } else if (self_neighbors &&
+ (neighbor_check_lock.client == iter->second.client) &&
+ (neighbor_check_lock.pid == iter->second.pid) &&
+ share_space(iter, neighbor_check_lock)) {
+ self_neighbors->push_front(iter);
+ }
+ if ((iter->first < lock.start) && (CEPH_LOCK_EXCL == iter->second.type)) {
+ //can't be any more overlapping locks or they'd interfere with this one
+ cont = false;
+ } else if (held_locks.begin() == iter) cont = false;
+ else --iter;
+ }
+ return !overlaps.empty();
+}
+
+bool ceph_lock_state_t::get_waiting_overlaps(ceph_filelock& lock,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator>&
+ overlaps)
+{
+ dout(15) << "get_waiting_overlaps" << dendl;
+ multimap<uint64_t, ceph_filelock>::iterator iter =
+ get_last_before(lock.start + lock.length - 1, waiting_locks);
+ bool cont = iter != waiting_locks.end();
+ while(cont) {
+ if (share_space(iter, lock)) overlaps.push_front(iter);
+ if (waiting_locks.begin() == iter) cont = false;
+ --iter;
+ }
+ return !overlaps.empty();
+}
+
+void ceph_lock_state_t::split_by_owner(ceph_filelock& owner,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator>& locks,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator>&
+ owned_locks)
+{
+ list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = locks.begin();
+ dout(15) << "owner lock: " << owner << dendl;
+ while (iter != locks.end()) {
+ dout(15) << "comparing to " << (*iter)->second << dendl;
+ if ((*iter)->second.client == owner.client &&
+ (*iter)->second.pid_namespace == owner.pid_namespace &&
+ (*iter)->second.pid == owner.pid) {
+ dout(15) << "success, pushing to owned_locks" << dendl;
+ owned_locks.push_back(*iter);
+ iter = locks.erase(iter);
+ } else {
+ dout(15) << "failure, something not equal in this group "
+ << (*iter)->second.client << ":" << owner.client << ","
+ << (*iter)->second.pid_namespace << ":" << owner.pid_namespace
+ << "," << (*iter)->second.pid << ":" << owner.pid << dendl;
+ ++iter;
+ }
+ }
+}
+
+ceph_filelock *
+ceph_lock_state_t::contains_exclusive_lock(list<multimap<uint64_t,
+ ceph_filelock>::iterator>& locks)
+{
+ for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = locks.begin();
+ iter != locks.end();
+ ++iter) {
+ if (CEPH_LOCK_EXCL == (*iter)->second.type) return &(*iter)->second;
+ }
+ return NULL;
+}
diff --git a/src/mds/flock.h b/src/mds/flock.h
index 69228b46c90..133feabee66 100644
--- a/src/mds/flock.h
+++ b/src/mds/flock.h
@@ -26,239 +26,63 @@ inline bool operator==(ceph_filelock& l, ceph_filelock& r) {
l.type == r.type;
}
-struct ceph_lock_state_t {
+class ceph_lock_state_t {
+public:
multimap<uint64_t, ceph_filelock> held_locks; // current locks
multimap<uint64_t, ceph_filelock> waiting_locks; // locks waiting for other locks
// both of the above are keyed by starting offset
map<client_t, int> client_held_lock_counts;
map<client_t, int> client_waiting_lock_counts;
- bool is_waiting(ceph_filelock &fl) {
- multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
- while (p != waiting_locks.end()) {
- if (p->second.start > fl.start)
- return false;
- if (p->second.length == fl.length &&
- p->second.client == fl.client &&
- p->second.pid == fl.pid &&
- p->second.pid_namespace == fl.pid_namespace)
- return true;
- ++p;
- }
- return false;
- }
- void remove_waiting(ceph_filelock& fl) {
- multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
- while (p != waiting_locks.end()) {
- if (p->second.start > fl.start)
- return;
- if (p->second.length == fl.length &&
- p->second.client == fl.client &&
- p->second.pid == fl.pid &&
- p->second.pid_namespace == fl.pid_namespace) {
- waiting_locks.erase(p);
- return;
- }
- ++p;
- }
- }
+ /**
+ * Check if a lock is on the waiting_locks list.
+ *
+ * @param fl The filelock to check for
+ * @returns True if the lock is waiting, false otherwise
+ */
+ bool is_waiting(ceph_filelock &fl);
+ /**
+ * Remove a lock from the waiting_locks list
+ *
+ * @param fl The filelock to remove
+ */
+ void remove_waiting(ceph_filelock& fl);
/*
* Try to set a new lock. If it's blocked and wait_on_fail is true,
* add the lock to waiting_locks.
* The lock needs to be of type CEPH_LOCK_EXCL or CEPH_LOCK_SHARED.
+ * This may merge previous locks, or convert the type of already-owned
+ * locks.
*
- * If we already added ourselves to waiting_locks, did_wait will be
- * true. If did_wait==true and we're not on the list, that means we
- * were canceled and we should return an error.
+ * @param new_lock The lock to set
+ * @param wait_on_fail whether to wait until the lock can be set.
+ * Otherwise it fails immediately when blocked.
*
- * Returns true if set, false if not set.
+ * @returns true if set, false if not set.
*/
- bool add_lock(ceph_filelock& new_lock, bool wait_on_fail) {
- dout(15) << "add_lock " << new_lock << dendl;
- bool ret = false;
- list<multimap<uint64_t, ceph_filelock>::iterator>
- overlapping_locks, self_overlapping_locks, neighbor_locks;
-
- // first, get any overlapping locks and split them into owned-by-us and not
- if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
- dout(15) << "got overlapping lock, splitting by owner" << dendl;
- split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
- }
- if (!overlapping_locks.empty()) { //overlapping locks owned by others :(
- if (CEPH_LOCK_EXCL == new_lock.type) {
- //can't set, we want an exclusive
- dout(15) << "overlapping lock, and this lock is exclusive, can't set"
- << dendl;
- if (wait_on_fail) {
- waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
- }
- } else { //shared lock, check for any exclusive locks blocking us
- if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
- dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
- if (wait_on_fail) {
- waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
- }
- } else {
- //yay, we can insert a shared lock
- dout(15) << "inserting shared lock" << dendl;
- adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
- held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
- ret = true;
- }
- }
- } else { //no overlapping locks except our own
- adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
- dout(15) << "no conflicts, inserting " << new_lock << dendl;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (new_lock.start, new_lock));
- ret = true;
- }
- if (ret)
- ++client_held_lock_counts[(client_t)new_lock.client];
- else if (wait_on_fail)
- ++client_waiting_lock_counts[(client_t)new_lock.client];
- return ret;
- }
-
- void look_for_lock(ceph_filelock& testing_lock) {
- list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
- self_overlapping_locks;
- if (get_overlapping_locks(testing_lock, overlapping_locks)) {
- split_by_owner(testing_lock, overlapping_locks, self_overlapping_locks);
- }
- if (!overlapping_locks.empty()) { //somebody else owns overlapping lock
- if (CEPH_LOCK_EXCL == testing_lock.type) { //any lock blocks it
- testing_lock = (*overlapping_locks.begin())->second;
- } else {
- ceph_filelock *blocking_lock;
- if ((blocking_lock = contains_exclusive_lock(overlapping_locks))) {
- testing_lock = *blocking_lock;
- } else { //nothing blocking!
- testing_lock.type = CEPH_LOCK_UNLOCK;
- }
- }
- return;
- }
- //if we get here, only our own locks block
- testing_lock.type = CEPH_LOCK_UNLOCK;
- }
+ bool add_lock(ceph_filelock& new_lock, bool wait_on_fail);
+ /**
+ * See if a lock is blocked by existing locks. If the lock is blocked,
+ * it will be set to the value of the first blocking lock. Otherwise,
+ * it will be returned unchanged, except for setting the type field
+ * to CEPH_LOCK_UNLOCK.
+ *
+ * @param testing_lock The lock to check for conflicts on.
+ */
+ void look_for_lock(ceph_filelock& testing_lock);
/*
* Remove lock(s) described in old_lock. This may involve splitting a
* previous lock or making a previous lock smaller.
+ *
+ * @param removal_lock The lock to remove
+ * @param activated_locks A return parameter, holding activated wait locks.
*/
void remove_lock(ceph_filelock removal_lock,
- list<ceph_filelock>& activated_locks) {
- list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
- self_overlapping_locks, crossed_waiting_locks;
- if (get_overlapping_locks(removal_lock, overlapping_locks)) {
- dout(15) << "splitting by owner" << dendl;
- split_by_owner(removal_lock, overlapping_locks, self_overlapping_locks);
- } else dout(15) << "attempt to remove lock at " << removal_lock.start
- << " but no locks there!" << dendl;
- bool remove_to_end = (0 == removal_lock.length);
- bool old_lock_to_end;
- uint64_t removal_start = removal_lock.start;
- uint64_t removal_end = removal_start + removal_lock.length - 1;
- uint64_t old_lock_end;
- __s64 old_lock_client = 0;
- ceph_filelock *old_lock;
-
- dout(15) << "examining " << self_overlapping_locks.size()
- << " self-overlapping locks for removal" << dendl;
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = self_overlapping_locks.begin();
- iter != self_overlapping_locks.end();
- ++iter) {
- dout(15) << "self overlapping lock " << (*iter)->second << dendl;
- old_lock = &(*iter)->second;
- old_lock_to_end = (0 == old_lock->length);
- old_lock_end = old_lock->start + old_lock->length - 1;
- old_lock_client = old_lock->client;
- if (remove_to_end) {
- if (old_lock->start < removal_start) {
- old_lock->length = removal_start - old_lock->start;
- } else {
- dout(15) << "erasing " << (*iter)->second << dendl;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- } else if (old_lock_to_end) {
- ceph_filelock append_lock = *old_lock;
- append_lock.start = removal_end+1;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (append_lock.start, append_lock));
- ++client_held_lock_counts[(client_t)old_lock->client];
- if (old_lock->start >= removal_start) {
- dout(15) << "erasing " << (*iter)->second << dendl;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- } else old_lock->length = removal_start - old_lock->start;
- } else {
- if (old_lock_end > removal_end) {
- ceph_filelock append_lock = *old_lock;
- append_lock.start = removal_end + 1;
- append_lock.length = old_lock_end - append_lock.start + 1;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (append_lock.start, append_lock));
- ++client_held_lock_counts[(client_t)old_lock->client];
- }
- if (old_lock->start < removal_start) {
- old_lock->length = removal_start - old_lock->start;
- } else {
- dout(15) << "erasing " << (*iter)->second << dendl;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- }
- if (!client_held_lock_counts[old_lock_client]) {
- client_held_lock_counts.erase(old_lock_client);
- }
- }
-
- /* okay, we've removed the locks, but removing them might allow some
- * other waiting locks to come through */
- if (get_waiting_overlaps(removal_lock, crossed_waiting_locks)) {
- /*let's do this the SUPER lazy way for now. Should work out something
- that's slightly less slow and wasteful, though.
- 1) Remove lock from waiting_locks.
- 2) attempt to insert lock via add_lock
- 3) Add to success list if we get back "true"
-
- In the future, should probably set this up to detect some
- guaranteed blocks and do fewer map lookups.
- */
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = crossed_waiting_locks.begin();
- iter != crossed_waiting_locks.end();
- ++iter) {
- ceph_filelock cur_lock = (*iter)->second;
- waiting_locks.erase(*iter);
- --client_waiting_lock_counts[(client_t)cur_lock.client];
- if (!client_waiting_lock_counts[(client_t)cur_lock.client]) {
- client_waiting_lock_counts.erase((client_t)cur_lock.client);
- }
- if (add_lock(cur_lock, true))
- activated_locks.push_back(cur_lock);
- }
- }
- }
-
- bool remove_all_from (client_t client) {
- bool cleared_any = false;
- if (client_held_lock_counts.count(client)) {
- remove_all_from(client, held_locks);
- client_held_lock_counts.erase(client);
- cleared_any = true;
- }
- if (client_waiting_lock_counts.count(client)) {
- remove_all_from(client, waiting_locks);
- client_waiting_lock_counts.erase(client);
- }
- return cleared_any;
- }
+ list<ceph_filelock>& activated_locks);
+ bool remove_all_from(client_t client);
private:
/**
* Adjust old locks owned by a single process so that process can set
@@ -270,182 +94,32 @@ private:
* This function should only be called once you know the lock will be
* inserted, as it DOES adjust new_lock. You can call this function
* on an empty list, in which case it does nothing.
- * This function does not remove elements from the list, so regard the list
+ * This function does not remove elements from old_locks, so regard the list
* as bad information following function invocation.
*
- * new_lock: The new lock the process has requested.
- * old_locks: list of all locks currently held by same
+ * @param new_lock The new lock the process has requested.
+ * @param old_locks list of all locks currently held by same
* client/process that overlap new_lock.
- * neighbor_locks: locks owned by same process that neighbor new_lock on
+ * @param neighbor_locks locks owned by same process that neighbor new_lock on
* left or right side.
*/
void adjust_locks(list<multimap<uint64_t, ceph_filelock>::iterator> old_locks,
- ceph_filelock& new_lock,
- list<multimap<uint64_t, ceph_filelock>::iterator>
- neighbor_locks) {
- dout(15) << "adjust_locks" << dendl;
- bool new_lock_to_end = (0 == new_lock.length);
- bool old_lock_to_end;
- uint64_t new_lock_start = new_lock.start;
- uint64_t new_lock_end = new_lock.start + new_lock.length - 1;
- uint64_t old_lock_start, old_lock_end;
- __s64 old_lock_client = 0;
- ceph_filelock *old_lock;
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = old_locks.begin();
- iter != old_locks.end();
- ++iter) {
- old_lock = &(*iter)->second;
- dout(15) << "adjusting lock: " << *old_lock << dendl;
- old_lock_to_end = (0 == old_lock->length);
- old_lock_start = old_lock->start;
- old_lock_end = old_lock->start + old_lock->length - 1;
- new_lock_start = new_lock.start;
- new_lock_end = new_lock.start + new_lock.length - 1;
- old_lock_client = old_lock->client;
- if (new_lock_to_end || old_lock_to_end) {
- //special code path to deal with a length set at 0
- dout(15) << "one lock extends forever" << dendl;
- if (old_lock->type == new_lock.type) {
- //just unify them in new lock, remove old lock
- dout(15) << "same lock type, unifying" << dendl;
- new_lock.start = (new_lock_start < old_lock_start) ? new_lock_start :
- old_lock_start;
- new_lock.length = 0;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- } else { //not same type, have to keep any remains of old lock around
- dout(15) << "shrinking old lock" << dendl;
- if (new_lock_to_end) {
- if (old_lock_start < new_lock_start) {
- old_lock->length = new_lock_start - old_lock_start;
- } else {
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- } else { //old lock extends past end of new lock
- ceph_filelock appended_lock = *old_lock;
- appended_lock.start = new_lock_end + 1;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (appended_lock.start, appended_lock));
- ++client_held_lock_counts[(client_t)old_lock->client];
- if (old_lock_start < new_lock_start) {
- old_lock->length = new_lock_start - old_lock_start;
- } else {
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- }
- }
- } else {
- if (old_lock->type == new_lock.type) { //just merge them!
- dout(15) << "merging locks, they're the same type" << dendl;
- new_lock.start = (old_lock_start < new_lock_start ) ? old_lock_start :
- new_lock_start;
- int new_end = (new_lock_end > old_lock_end) ? new_lock_end :
- old_lock_end;
- new_lock.length = new_end - new_lock.start + 1;
- dout(15) << "erasing lock " << (*iter)->second << dendl;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- } else { //we'll have to update sizes and maybe make new locks
- dout(15) << "locks aren't same type, changing sizes" << dendl;
- if (old_lock_end > new_lock_end) { //add extra lock after new_lock
- ceph_filelock appended_lock = *old_lock;
- appended_lock.start = new_lock_end + 1;
- appended_lock.length = old_lock_end - appended_lock.start + 1;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (appended_lock.start, appended_lock));
- ++client_held_lock_counts[(client_t)old_lock->client];
- }
- if (old_lock_start < new_lock_start) {
- old_lock->length = new_lock_start - old_lock_start;
- } else { //old_lock starts inside new_lock, so remove it
- //if it extended past new_lock_end it's been replaced
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- }
- }
- if (!client_held_lock_counts[old_lock_client]) {
- client_held_lock_counts.erase(old_lock_client);
- }
- }
-
- //make sure to coalesce neighboring locks
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = neighbor_locks.begin();
- iter != neighbor_locks.end();
- ++iter) {
- old_lock = &(*iter)->second;
- old_lock_client = old_lock->client;
- dout(15) << "lock to coalesce: " << *old_lock << dendl;
- /* because if it's a neibhoring lock there can't be any self-overlapping
- locks that covered it */
- if (old_lock->type == new_lock.type) { //merge them
- if (0 == new_lock.length) {
- if (old_lock->start + old_lock->length == new_lock.start) {
- new_lock.start = old_lock->start;
- } else assert(0); /* if there's no end to new_lock, the neighbor
- HAS TO be to left side */
- } else if (0 == old_lock->length) {
- if (new_lock.start + new_lock.length == old_lock->start) {
- new_lock.length = 0;
- } else assert(0); //same as before, but reversed
- } else {
- if (old_lock->start + old_lock->length == new_lock.start) {
- new_lock.start = old_lock->start;
- new_lock.length = old_lock->length + new_lock.length;
- } else if (new_lock.start + new_lock.length == old_lock->start) {
- new_lock.length = old_lock->length + new_lock.length;
- }
- }
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- if (!client_held_lock_counts[old_lock_client]) {
- client_held_lock_counts.erase(old_lock_client);
- }
- }
- }
+ ceph_filelock& new_lock,
+ list<multimap<uint64_t, ceph_filelock>::iterator>
+ neighbor_locks);
//this won't reset the counter map value, do that yourself
- void remove_all_from(client_t client, multimap<uint64_t, ceph_filelock>& locks) {
- multimap<uint64_t, ceph_filelock>::iterator iter = locks.begin();
- while (iter != locks.end()) {
- if ((client_t)iter->second.client == client) {
- locks.erase(iter++);
- } else ++iter;
- }
- }
+ void remove_all_from(client_t client,
+ multimap<uint64_t, ceph_filelock>& locks);
//get last lock prior to start position
multimap<uint64_t, ceph_filelock>::iterator
- get_lower_bound(uint64_t start, multimap<uint64_t, ceph_filelock>& lock_map) {
- multimap<uint64_t, ceph_filelock>::iterator lower_bound =
- lock_map.lower_bound(start);
- if ((lower_bound->first != start)
- && (start != 0)
- && (lower_bound != lock_map.begin())) --lower_bound;
- if (lock_map.end() == lower_bound)
- dout(15) << "get_lower_dout(15)eturning end()" << dendl;
- else dout(15) << "get_lower_bound returning iterator pointing to "
- << lower_bound->second << dendl;
- return lower_bound;
- }
-
+ get_lower_bound(uint64_t start,
+ multimap<uint64_t, ceph_filelock>& lock_map);
//get latest-starting lock that goes over the byte "end"
multimap<uint64_t, ceph_filelock>::iterator
- get_last_before(uint64_t end, multimap<uint64_t, ceph_filelock>& lock_map) {
- multimap<uint64_t, ceph_filelock>::iterator last =
- lock_map.upper_bound(end);
- if (last != lock_map.begin()) --last;
- if (lock_map.end() == last)
- dout(15) << "get_last_before returning end()" << dendl;
- else dout(15) << "get_last_before returning iterator pointing to "
- << last->second << dendl;
- return last;
- }
+ get_last_before(uint64_t end,
+ multimap<uint64_t, ceph_filelock>& lock_map);
/*
* See if an iterator's lock covers any of the same bounds as a given range
@@ -454,25 +128,18 @@ private:
* If the length is 0, the lock covers from "start" to the end of the file.
*/
bool share_space(multimap<uint64_t, ceph_filelock>::iterator& iter,
- uint64_t start, uint64_t end) {
- bool ret = ((iter->first >= start && iter->first <= end) ||
- ((iter->first < start) &&
- (((iter->first + iter->second.length - 1) >= start) ||
- (0 == iter->second.length))));
- dout(15) << "share_space got start: " << start << ", end: " << end
- << ", lock: " << iter->second << ", returning " << ret << dendl;
- return ret;
- }
+ uint64_t start, uint64_t end);
+
bool share_space(multimap<uint64_t, ceph_filelock>::iterator& iter,
- ceph_filelock& lock) {
+ ceph_filelock &lock) {
uint64_t end = lock.start;
- if (lock.length)
+ if (lock.length) {
end += lock.length - 1;
- else // zero length means to end-of-file
+ } else { // zero length means end of file
end = uint64_t(-1);
+ }
return share_space(iter, lock.start, end);
}
-
/*
*get a list of all locks overlapping with the given lock's range
* lock: the lock to compare with.
@@ -480,47 +147,11 @@ private:
* Returns: true if at least one lock overlaps.
*/
bool get_overlapping_locks(ceph_filelock& lock,
- list<multimap<uint64_t, ceph_filelock>::iterator> & overlaps,
- list<multimap<uint64_t, ceph_filelock>::iterator> *self_neighbors) {
- dout(15) << "get_overlapping_locks" << dendl;
- // create a lock starting one earlier and ending one later
- // to check for neighbors
- ceph_filelock neighbor_check_lock = lock;
- if (neighbor_check_lock.start != 0) {
- neighbor_check_lock.start = neighbor_check_lock.start - 1;
- if (neighbor_check_lock.length)
- neighbor_check_lock.length = neighbor_check_lock.length + 2;
- } else {
- if (neighbor_check_lock.length)
- neighbor_check_lock.length = neighbor_check_lock.length + 1;
- }
- //find the last held lock starting at the point after lock
- uint64_t endpoint = lock.start;
- if (lock.length) {
- endpoint += lock.length;
- } else {
- endpoint = uint64_t(-1); // max offset
- }
- multimap<uint64_t, ceph_filelock>::iterator iter =
- get_last_before(endpoint, held_locks);
- bool cont = iter != held_locks.end();
- while(cont) {
- if (share_space(iter, lock)) {
- overlaps.push_front(iter);
- } else if (self_neighbors &&
- (neighbor_check_lock.client == iter->second.client) &&
- (neighbor_check_lock.pid == iter->second.pid) &&
- share_space(iter, neighbor_check_lock)) {
- self_neighbors->push_front(iter);
- }
- if ((iter->first < lock.start) && (CEPH_LOCK_EXCL == iter->second.type)) {
- //can't be any more overlapping locks or they'd interfere with this one
- cont = false;
- } else if (held_locks.begin() == iter) cont = false;
- else --iter;
- }
- return !overlaps.empty();
- }
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> & overlaps,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> *self_neighbors);
+
bool get_overlapping_locks(ceph_filelock& lock,
list<multimap<uint64_t, ceph_filelock>::iterator>& overlaps) {
@@ -534,20 +165,8 @@ private:
* Returns: true if at least one waiting_lock overlaps
*/
bool get_waiting_overlaps(ceph_filelock& lock,
- list<multimap<uint64_t, ceph_filelock>::iterator>&
- overlaps) {
- dout(15) << "get_waiting_overlaps" << dendl;
- multimap<uint64_t, ceph_filelock>::iterator iter =
- get_last_before(lock.start + lock.length - 1, waiting_locks);
- bool cont = iter != waiting_locks.end();
- while(cont) {
- if (share_space(iter, lock)) overlaps.push_front(iter);
- if (waiting_locks.begin() == iter) cont = false;
- --iter;
- }
- return !overlaps.empty();
- }
-
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator>& overlaps);
/*
* split a list of locks up by whether they're owned by same
* process as given lock
@@ -557,38 +176,13 @@ private:
* owned_locks: an empty list, to be filled with the locks owned by owner
*/
void split_by_owner(ceph_filelock& owner,
- list<multimap<uint64_t, ceph_filelock>::iterator> & locks,
- list<multimap<uint64_t, ceph_filelock>::iterator> & owned_locks) {
- list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = locks.begin();
- dout(15) << "owner lock: " << owner << dendl;
- while (iter != locks.end()) {
- dout(15) << "comparing to " << (*iter)->second << dendl;
- if ((*iter)->second.client == owner.client &&
- (*iter)->second.pid_namespace == owner.pid_namespace &&
- (*iter)->second.pid == owner.pid) {
- dout(15) << "success, pushing to owned_locks" << dendl;
- owned_locks.push_back(*iter);
- iter = locks.erase(iter);
- } else {
- dout(15) << "failure, something not equal in this group "
- << (*iter)->second.client << ":" << owner.client << ","
- << (*iter)->second.pid_namespace << ":" << owner.pid_namespace
- << "," << (*iter)->second.pid << ":" << owner.pid << dendl;
- ++iter;
- }
- }
- }
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> & locks,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> & owned_locks);
- ceph_filelock *contains_exclusive_lock(list<multimap<uint64_t, ceph_filelock>::iterator>& locks) {
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = locks.begin();
- iter != locks.end();
- ++iter) {
- if (CEPH_LOCK_EXCL == (*iter)->second.type) return &(*iter)->second;
- }
- return NULL;
- }
+ ceph_filelock *contains_exclusive_lock(list<multimap<uint64_t,
+ ceph_filelock>::iterator>& locks);
public:
void encode(bufferlist& bl) const {
diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h
index 70f029740d0..32f597277a1 100644
--- a/src/mds/mdstypes.h
+++ b/src/mds/mdstypes.h
@@ -430,6 +430,7 @@ struct inode_t {
bool is_truncating() const { return (truncate_pending > 0); }
void truncate(uint64_t old_size, uint64_t new_size) {
+ assert(new_size < old_size);
truncate_from = old_size;
size = new_size;
rstat.rbytes = new_size;
diff --git a/src/mkcephfs.in b/src/mkcephfs.in
index d4d5092bf93..fa43155c173 100644
--- a/src/mkcephfs.in
+++ b/src/mkcephfs.in
@@ -437,7 +437,7 @@ if [ $allhosts -eq 1 ]; then
do_root_cmd "$0 -d $rdir --prepare-osdfs $name"
fi
- do_cmd "$0 -d $rdir --init-daemon $name"
+ do_root_cmd "$0 -d $rdir --init-daemon $name"
# collect the key
if [ -n "$ssh" ]; then
@@ -468,7 +468,7 @@ if [ $allhosts -eq 1 ]; then
rdir=$dir
fi
- do_cmd "$0 -d $rdir --init-daemon $name"
+ do_root_cmd "$0 -d $rdir --init-daemon $name"
done
# admin keyring
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 6db2324eebd..abb878dea9d 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1664,16 +1664,21 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
i = atoi(m->cmd[2].c_str());
if (i < 0 || i >= osdmap.get_max_osd()) {
ss << i << " is not a valid osd id";
- getline(ss, rs);
- return -ERANGE;
+ err = -ERANGE;
+ goto out;
+ }
+ if (osdmap.exists(i)) {
+ ss << i << " already exists";
+ err = -EEXIST;
+ goto out;
}
- if (osdmap.exists(i) ||
- pending_inc.new_up_client.count(i) ||
+ if (pending_inc.new_up_client.count(i) ||
(pending_inc.new_state.count(i) &&
(pending_inc.new_state[i] & CEPH_OSD_EXISTS))) {
ss << i << " already exists";
getline(ss, rs);
- return -EEXIST;
+ paxos->wait_for_commit(new Monitor::C_Command(mon, m, -EEXIST, rs, paxos->get_version()));
+ return true;
}
} else {
// allocate a new id
diff --git a/src/os/CollectionIndex.h b/src/os/CollectionIndex.h
index f8846a78cd6..2fca26fb0a6 100644
--- a/src/os/CollectionIndex.h
+++ b/src/os/CollectionIndex.h
@@ -58,6 +58,9 @@ protected:
/// Type of returned paths
typedef std::tr1::shared_ptr<Path> IndexedPath;
+ static const uint32_t FLAT_INDEX_TAG = 0;
+ static const uint32_t HASH_INDEX_TAG = 1;
+ static const uint32_t HASH_INDEX_TAG_2 = 2;
/**
* For tracking Filestore collection versions.
*
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 9d3ebef3c75..86281ab9982 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -1268,7 +1268,7 @@ int FileStore::version_stamp_is_valid(uint32_t *version)
}
bufferptr bp(PATH_MAX);
int ret = safe_read(fd, bp.c_str(), bp.length());
- TEMP_FAILURE_RETRY(::close(op_fd));
+ TEMP_FAILURE_RETRY(::close(fd));
if (ret < 0)
return -errno;
bufferlist bl;
@@ -1292,7 +1292,7 @@ int FileStore::write_version_stamp()
::encode(on_disk_version, bl);
int ret = safe_write(fd, bl.c_str(), bl.length());
- TEMP_FAILURE_RETRY(::close(op_fd));
+ TEMP_FAILURE_RETRY(::close(fd));
if (ret < 0)
return -errno;
return 0;
@@ -2985,7 +2985,11 @@ void FileStore::sync_entry()
sync_epoch++;
dout(15) << "sync_entry committing " << cp << " sync_epoch " << sync_epoch << dendl;
- write_op_seq(op_fd, cp);
+ if (write_op_seq(op_fd, cp) < 0) {
+ derr << "Error: " << cpp_strerror(errno)
+ << " during write_op_seq" << dendl;
+ assert(0);
+ }
bool do_snap = btrfs && g_conf->filestore_btrfs_snap;
diff --git a/src/os/FileStore.h b/src/os/FileStore.h
index 7f01c897407..d17d5e6b82f 100644
--- a/src/os/FileStore.h
+++ b/src/os/FileStore.h
@@ -39,7 +39,7 @@ using namespace __gnu_cxx;
// fake attributes in memory, if we need to.
class FileStore : public JournalingObjectStore {
- static const uint32_t on_disk_version = 1;
+ static const uint32_t on_disk_version = 2;
string basedir, journalpath;
std::string current_fn;
std::string current_op_seq_fn;
diff --git a/src/os/FlatIndex.h b/src/os/FlatIndex.h
index d94edf3f69e..53e27f5ec08 100644
--- a/src/os/FlatIndex.h
+++ b/src/os/FlatIndex.h
@@ -35,7 +35,7 @@ public:
FlatIndex(string base_path) : base_path(base_path) {}
/// @see CollectionIndex
- uint32_t collection_version() { return 0; }
+ uint32_t collection_version() { return FLAT_INDEX_TAG; }
/// @see CollectionIndex
void set_ref(std::tr1::shared_ptr<CollectionIndex> ref);
diff --git a/src/os/HashIndex.h b/src/os/HashIndex.h
index fb8fde599ab..bbcbc9904ad 100644
--- a/src/os/HashIndex.h
+++ b/src/os/HashIndex.h
@@ -128,12 +128,13 @@ public:
HashIndex(
const char *base_path, ///< [in] Path to the index root.
int merge_at, ///< [in] Merge threshhold.
- int split_at) ///< [in] Split threshhold.
- : LFNIndex(base_path), merge_threshold(merge_at),
+ int split_at, ///< [in] Split threshhold.
+ uint32_t index_version)///< [in] Index version
+ : LFNIndex(base_path, index_version), merge_threshold(merge_at),
split_threshold(split_at) {}
/// @see CollectionIndex
- uint32_t collection_version() { return 1; }
+ uint32_t collection_version() { return index_version; }
/// @see CollectionIndex
int cleanup();
diff --git a/src/os/IndexManager.cc b/src/os/IndexManager.cc
index 251599b1175..504cd8f5217 100644
--- a/src/os/IndexManager.cc
+++ b/src/os/IndexManager.cc
@@ -70,7 +70,8 @@ int IndexManager::init_index(coll_t c, const char *path, uint32_t version) {
if (r < 0)
return r;
HashIndex index(path, g_conf->filestore_merge_threshold,
- g_conf->filestore_split_multiple);
+ g_conf->filestore_split_multiple,
+ CollectionIndex::HASH_INDEX_TAG_2);
return index.init();
}
@@ -84,15 +85,16 @@ int IndexManager::build_index(coll_t c, const char *path, Index *index) {
return r;
switch (version) {
- case 0: {
+ case CollectionIndex::FLAT_INDEX_TAG: {
*index = Index(new FlatIndex(path),
RemoveOnDelete(c, this));
return 0;
}
- case 1: {
+ case CollectionIndex::HASH_INDEX_TAG: // fall through
+ case CollectionIndex::HASH_INDEX_TAG_2: {
// Must be a HashIndex
*index = Index(new HashIndex(path, g_conf->filestore_merge_threshold,
- g_conf->filestore_split_multiple),
+ g_conf->filestore_split_multiple, version),
RemoveOnDelete(c, this));
return 0;
}
@@ -102,7 +104,8 @@ int IndexManager::build_index(coll_t c, const char *path, Index *index) {
} else {
// No need to check
*index = Index(new HashIndex(path, g_conf->filestore_merge_threshold,
- g_conf->filestore_split_multiple),
+ g_conf->filestore_split_multiple,
+ CollectionIndex::HASH_INDEX_TAG_2),
RemoveOnDelete(c, this));
return 0;
}
diff --git a/src/os/LFNIndex.cc b/src/os/LFNIndex.cc
index d6903032e3c..557b69a564d 100644
--- a/src/os/LFNIndex.cc
+++ b/src/os/LFNIndex.cc
@@ -256,6 +256,23 @@ int LFNIndex::get_mangled_name(const vector<string> &from,
return lfn_get_name(from, hoid, mangled_name, 0, exists);
}
+static int get_hobject_from_oinfo(const char *dir, const char *file,
+ hobject_t *o) {
+ char path[PATH_MAX];
+ bufferptr bp(PATH_MAX);
+ snprintf(path, sizeof(path), "%s/%s", dir, file);
+ // Hack, user.ceph._ is the attribute used to store the object info
+ int r = do_getxattr(path, "user.ceph._", bp.c_str(), bp.length());
+ if (r < 0)
+ return r;
+ bufferlist bl;
+ bl.push_back(bp);
+ object_info_t oi(bl);
+ *o = oi.soid;
+ return 0;
+}
+
+
int LFNIndex::list_objects(const vector<string> &to_list, int max_objs,
long *handle, map<string, hobject_t> *out) {
string to_list_path = get_full_path_subdir(to_list);
@@ -295,6 +312,9 @@ int LFNIndex::list_objects(const vector<string> &to_list, int max_objs,
if (!lfn_must_hash(long_name)) {
assert(long_name == short_name);
}
+ if (index_version == HASH_INDEX_TAG)
+ get_hobject_from_oinfo(to_list_path.c_str(), short_name.c_str(), &obj);
+
out->insert(pair<string, hobject_t>(short_name, obj));
++listed;
} else {
@@ -413,7 +433,7 @@ int LFNIndex::remove_attr_path(const vector<string> &path,
return do_removexattr(full_path.c_str(), mangled_attr_name.c_str());
}
-string LFNIndex::lfn_generate_object_name(const hobject_t &hoid) {
+string LFNIndex::lfn_generate_object_name_keyless(const hobject_t &hoid) {
char s[FILENAME_MAX_LEN];
char *end = s + sizeof(s);
char *t = s;
@@ -449,7 +469,55 @@ string LFNIndex::lfn_generate_object_name(const hobject_t &hoid) {
snprintf(t, end - t, "_%.*X", (int)(sizeof(hoid.hash)*2), hoid.hash);
return string(s);
- }
+}
+
+static void append_escaped(string::const_iterator begin,
+ string::const_iterator end,
+ string *out) {
+ for (string::const_iterator i = begin; i != end; ++i) {
+ if (*i == '\\') {
+ out->append("\\\\");
+ } else if (*i == '/') {
+ out->append("\\s");
+ } else if (*i == '_') {
+ out->append("\\u");
+ } else {
+ out->append(i, i+1);
+ }
+ }
+}
+
+string LFNIndex::lfn_generate_object_name(const hobject_t &hoid) {
+ if (index_version == HASH_INDEX_TAG)
+ return lfn_generate_object_name_keyless(hoid);
+
+ string full_name;
+ string::const_iterator i = hoid.oid.name.begin();
+ if (hoid.oid.name.substr(0, 4) == "DIR_") {
+ full_name.append("\\d");
+ i += 4;
+ } else if (hoid.oid.name[0] == '.') {
+ full_name.append("\\.");
+ ++i;
+ }
+ append_escaped(i, hoid.oid.name.end(), &full_name);
+ full_name.append("_");
+ append_escaped(hoid.key.begin(), hoid.key.end(), &full_name);
+ full_name.append("_");
+
+ char snap_with_hash[PATH_MAX];
+ char *t = snap_with_hash;
+ char *end = t + sizeof(snap_with_hash);
+ if (hoid.snap == CEPH_NOSNAP)
+ t += snprintf(t, end - t, "head");
+ else if (hoid.snap == CEPH_SNAPDIR)
+ t += snprintf(t, end - t, "snapdir");
+ else
+ t += snprintf(t, end - t, "%llx", (long long unsigned)hoid.snap);
+ snprintf(t, end - t, "_%.*X", (int)(sizeof(hoid.hash)*2), hoid.hash);
+ full_name += string(snap_with_hash);
+ return full_name;
+}
int LFNIndex::lfn_get_name(const vector<string> &path,
const hobject_t &hoid,
@@ -487,7 +555,7 @@ int LFNIndex::lfn_get_name(const vector<string> &path,
for ( ; ; ++i) {
candidate = lfn_get_short_name(hoid, i);
candidate_path = get_full_path(path, candidate);
- r = do_getxattr(candidate_path.c_str(), LFN_ATTR.c_str(), buf, sizeof(buf));
+ r = do_getxattr(candidate_path.c_str(), get_lfn_attr().c_str(), buf, sizeof(buf));
if (r < 0) {
if (errno != ENODATA && errno != ENOENT)
return -errno;
@@ -528,7 +596,7 @@ int LFNIndex::lfn_created(const vector<string> &path,
return 0;
string full_path = get_full_path(path, mangled_name);
string full_name = lfn_generate_object_name(hoid);
- return do_setxattr(full_path.c_str(), LFN_ATTR.c_str(),
+ return do_setxattr(full_path.c_str(), get_lfn_attr().c_str(),
full_name.c_str(), full_name.size());
}
@@ -585,15 +653,15 @@ int LFNIndex::lfn_unlink(const vector<string> &path,
}
int LFNIndex::lfn_translate(const vector<string> &path,
- const string &short_name,
- hobject_t *out) {
+ const string &short_name,
+ hobject_t *out) {
if (!lfn_is_hashed_filename(short_name)) {
return lfn_parse_object_name(short_name, out);
}
// Get lfn_attr
string full_path = get_full_path(path, short_name);
char attr[PATH_MAX];
- int r = do_getxattr(full_path.c_str(), LFN_ATTR.c_str(), attr, sizeof(attr) - 1);
+ int r = do_getxattr(full_path.c_str(), get_lfn_attr().c_str(), attr, sizeof(attr) - 1);
if (r < 0)
return -errno;
if (r < (int)sizeof(attr))
@@ -666,13 +734,90 @@ static int parse_object(const char *s, hobject_t& o)
return 0;
}
-bool LFNIndex::lfn_parse_object_name(const string &long_name, hobject_t *out) {
+bool LFNIndex::lfn_parse_object_name_keyless(const string &long_name, hobject_t *out) {
bool r = parse_object(long_name.c_str(), *out);
if (!r) return r;
string temp = lfn_generate_object_name(*out);
return r;
}
+static bool append_unescaped(string::const_iterator begin,
+ string::const_iterator end,
+ string *out) {
+ for (string::const_iterator i = begin; i != end; ++i) {
+ if (*i == '\\') {
+ ++i;
+ if (*i == '\\')
+ out->append("\\");
+ else if (*i == 's')
+ out->append("/");
+ else if (*i == 'u')
+ out->append("_");
+ else
+ return false;
+ } else {
+ out->append(i, i+1);
+ }
+ }
+ return true;
+}
+
+bool LFNIndex::lfn_parse_object_name(const string &long_name, hobject_t *out) {
+ if (index_version == HASH_INDEX_TAG)
+ return lfn_parse_object_name_keyless(long_name, out);
+
+ string::const_iterator current = long_name.begin();
+ if (*current == '\\') {
+ ++current;
+ if (current == long_name.end()) {
+ return false;
+ } else if (*current == 'd') {
+ out->oid.name.append("DIR_");
+ ++current;
+ } else if (*current == '.') {
+ out->oid.name.append(".");
+ ++current;
+ } else {
+ --current;
+ }
+ }
+
+ string::const_iterator end = current;
+ for ( ; end != long_name.end() && *end != '_'; ++end) ;
+ if (end == long_name.end())
+ return false;
+ if (!append_unescaped(current, end, &(out->oid.name)))
+ return false;
+
+ current = ++end;
+ for ( ; end != long_name.end() && *end != '_'; ++end) ;
+ if (end == long_name.end())
+ return false;
+ if (!append_unescaped(current, end, &(out->key)))
+ return false;
+
+ current = ++end;
+ for ( ; end != long_name.end() && *end != '_'; ++end) ;
+ if (end == long_name.end())
+ return false;
+ string snap(current, end);
+
+ current = ++end;
+ for ( ; end != long_name.end() && *end != '_'; ++end) ;
+ if (end != long_name.end())
+ return false;
+ string hash(current, end);
+
+ if (snap == "head")
+ out->snap = CEPH_NOSNAP;
+ else if (snap == "snapdir")
+ out->snap = CEPH_SNAPDIR;
+ else
+ out->snap = strtoull(snap.c_str(), NULL, 16);
+ sscanf(hash.c_str(), "%X", &out->hash);
+ return true;
+}
+
bool LFNIndex::lfn_is_hashed_filename(const string &name) {
if (name.size() < (unsigned)FILENAME_SHORT_LEN) {
return 0;
@@ -776,13 +921,13 @@ string LFNIndex::demangle_path_component(const string &component) {
}
int LFNIndex::decompose_full_path(const char *in, vector<string> *out,
- hobject_t *hoid, string *shortname) {
+ hobject_t *hoid, string *shortname) {
const char *beginning = in + get_base_path().size();
const char *end = beginning;
while (1) {
end++;
beginning = end++;
- for (; *end != '\0' && *end != '/'; ++end);
+ for ( ; *end != '\0' && *end != '/'; ++end) ;
if (*end != '\0') {
out->push_back(demangle_path_component(string(beginning, end - beginning)));
continue;
diff --git a/src/os/LFNIndex.h b/src/os/LFNIndex.h
index dedd8465a28..e0321b45dbf 100644
--- a/src/os/LFNIndex.h
+++ b/src/os/LFNIndex.h
@@ -76,10 +76,26 @@ class LFNIndex : public CollectionIndex {
/// For reference counting the collection @see Path
std::tr1::weak_ptr<CollectionIndex> self_ref;
+protected:
+ const uint32_t index_version;
+
+private:
+ string lfn_attribute;
+
public:
/// Constructor
- LFNIndex(const char *base_path) ///< [in] path to Index root
- : base_path(base_path) {}
+ LFNIndex(
+ const char *base_path, ///< [in] path to Index root
+ uint32_t index_version)
+ : base_path(base_path), index_version(index_version) {
+ if (index_version == HASH_INDEX_TAG) {
+ lfn_attribute = LFN_ATTR;
+ } else {
+ char buf[100];
+ snprintf(buf, sizeof(buf), "%d", index_version);
+ lfn_attribute = LFN_ATTR + string(buf);
+ }
+ }
/// Virtual destructor
virtual ~LFNIndex() {}
@@ -306,6 +322,14 @@ protected:
private:
/* lfn translation functions */
+
+ /**
+ * Gets the version specific lfn attribute tag
+ */
+ const string &get_lfn_attr() const {
+ return lfn_attribute;
+ }
+
/**
* Gets the filename corresponsing to hoid in path.
*
@@ -359,11 +383,22 @@ private:
); ///< @return True if short_name is a subdir, false otherwise
/// Generate object name
+ string lfn_generate_object_name_keyless(
+ const hobject_t &hoid ///< [in] Object for which to generate.
+ ); ///< @return Generated object name.
+
+ /// Generate object name
string lfn_generate_object_name(
const hobject_t &hoid ///< [in] Object for which to generate.
); ///< @return Generated object name.
/// Parse object name
+ bool lfn_parse_object_name_keyless(
+ const string &long_name, ///< [in] Name to parse
+ hobject_t *out ///< [out] Resulting Object
+ ); ///< @return True if successfull, False otherwise.
+
+ /// Parse object name
bool lfn_parse_object_name(
const string &long_name, ///< [in] Name to parse
hobject_t *out ///< [out] Resulting Object
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 95adc3ff8ac..85d922cab11 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -193,16 +193,15 @@ static int do_convertfs(ObjectStore *store)
g_ceph_context->_conf->filestore_update_collections = true;
int r = store->mount();
if (r < 0)
- return -r;
+ return r;
uint32_t version;
r = store->version_stamp_is_valid(&version);
if (r < 0)
- return -r;
+ return r;
if (r == 1) {
derr << "FileStore is up to date." << dendl;
- store->umount();
- return 0;
+ return store->umount();
} else {
derr << "FileStore is old at version " << version << ". Updating..."
<< dendl;
@@ -212,7 +211,7 @@ static int do_convertfs(ObjectStore *store)
vector<coll_t> collections;
r = store->list_collections(collections);
if (r < 0)
- return -r;
+ return r;
derr << collections.size() << " to process." << dendl;
int processed = 0;
@@ -242,8 +241,7 @@ static int do_convertfs(ObjectStore *store)
store->sync_and_flush();
store->sync();
cerr << "Version stamp updated, done!" << std::endl;
- store->umount();
- return 0;
+ return store->umount();
}
int OSD::convertfs(const std::string &dev, const std::string &jdev)
@@ -5069,7 +5067,8 @@ void OSD::handle_op(MOSDOp *op)
if ((op->get_flags() & CEPH_OSD_FLAG_PGOP) == 0) {
// missing object?
- hobject_t head(op->get_oid(), CEPH_NOSNAP, op->get_pg().ps());
+ hobject_t head(op->get_oid(), op->get_object_locator().key,
+ CEPH_NOSNAP, op->get_pg().ps());
if (pg->is_missing_object(head)) {
pg->wait_for_missing_object(head, op);
pg->unlock();
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index c0f383b6e0f..d77639c2a3b 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -2250,6 +2250,7 @@ void PG::read_log(ObjectStore *store)
++i) {
if (i->oid == e.soid.oid && i->snap == e.soid.snap) {
e.soid.hash = i->hash;
+ e.soid.key = i->key;
found = true;
break;
}
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 61e8778fd14..d5f810d5130 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -393,7 +393,7 @@ public:
}
void encode(bufferlist &bl) const {
- __u8 struct_v = 2;
+ __u8 struct_v = 3;
::encode(struct_v, bl);
::encode(op, bl);
::encode(soid, bl);
@@ -417,6 +417,8 @@ public:
} else {
::decode(soid, bl);
}
+ if (struct_v < 3)
+ invalid_hash = true;
::decode(version, bl);
::decode(prior_version, bl);
::decode(reqid, bl);
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index b9a50f95936..700b02c01c3 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -404,7 +404,9 @@ void ReplicatedPG::do_op(MOSDOp *op)
ObjectContext *obc;
bool can_create = op->may_write();
snapid_t snapid;
- int r = find_object_context(hobject_t(op->get_oid(), op->get_snapid(), op->get_pg().ps()),
+ int r = find_object_context(hobject_t(op->get_oid(),
+ op->get_object_locator().key,
+ op->get_snapid(), op->get_pg().ps()),
op->get_object_locator(),
&obc, can_create, &snapid);
@@ -416,7 +418,8 @@ void ReplicatedPG::do_op(MOSDOp *op)
if (is_primary() || (!(op->get_rmw_flags() & CEPH_OSD_FLAG_LOCALIZE_READS))) {
// missing the specific snap we need; requeue and wait.
assert(!can_create); // only happens on a read
- hobject_t soid(op->get_oid(), snapid, op->get_pg().ps());
+ hobject_t soid(op->get_oid(), op->get_object_locator().key,
+ snapid, op->get_pg().ps());
wait_for_missing_object(soid, op);
return;
}
@@ -476,17 +479,19 @@ void ReplicatedPG::do_op(MOSDOp *op)
map<hobject_t,ObjectContext*> src_obc;
for (vector<OSDOp>::iterator p = op->ops.begin(); p != op->ops.end(); p++) {
OSDOp& osd_op = *p;
- hobject_t toid(osd_op.soid, op->get_pg().ps());
+ hobject_t toid(osd_op.soid, op->get_object_locator().key, op->get_pg().ps());
if (osd_op.soid.oid.name.length()) {
if (!src_obc.count(toid)) {
ObjectContext *sobc;
snapid_t ssnapid;
- int r = find_object_context(hobject_t(toid.oid, toid.snap, op->get_pg().ps()),
+ int r = find_object_context(hobject_t(toid.oid, op->get_object_locator().key,
+ toid.snap, op->get_pg().ps()),
op->get_object_locator(),
&sobc, false, &ssnapid);
if (r == -EAGAIN) {
// missing the specific snap we need; requeue and wait.
- hobject_t wait_oid(osd_op.soid.oid, ssnapid, op->get_pg().ps());
+ hobject_t wait_oid(osd_op.soid.oid, op->get_object_locator().key,
+ ssnapid, op->get_pg().ps());
wait_for_missing_object(wait_oid, op);
} else if (r) {
osd->reply_op_error(op, r);
@@ -816,7 +821,7 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid,
// load clone info
bufferlist bl;
ObjectContext *obc = 0;
- int r = find_object_context(hobject_t(coid.oid, sn, coid.hash),
+ int r = find_object_context(hobject_t(coid.oid, coid.key, sn, coid.hash),
OLOC_BLANK, &obc, false, NULL);
if (r == -ENOENT || coid.snap != obc->obs.oi.soid.snap) {
if (obc) put_object_context(obc);
@@ -829,7 +834,7 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid,
// get snap set context
if (!obc->ssc)
- obc->ssc = get_snapset_context(coid.oid, coid.hash, false);
+ obc->ssc = get_snapset_context(coid.oid, coid.key, coid.hash, false);
SnapSetContext *ssc = obc->ssc;
assert(ssc);
SnapSet& snapset = ssc->snapset;
@@ -926,7 +931,7 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid,
// save head snapset
dout(10) << coid << " new snapset " << snapset << dendl;
- hobject_t snapoid(coid.oid, snapset.head_exists ? CEPH_NOSNAP:CEPH_SNAPDIR, coid.hash);
+ hobject_t snapoid(coid.oid, coid.key, snapset.head_exists ? CEPH_NOSNAP:CEPH_SNAPDIR, coid.hash);
ctx->snapset_obc = get_object_context(snapoid, coi.oloc, false);
assert(ctx->snapset_obc->registered);
if (snapset.clones.empty() && !snapset.head_exists) {
@@ -1157,7 +1162,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops,
ObjectContext *src_obc = 0;
if (ceph_osd_op_type_multi(op.op)) {
- src_obc = ctx->src_obc[hobject_t(osd_op.soid, soid.hash)];
+ src_obc = ctx->src_obc[hobject_t(osd_op.soid, soid.key, soid.hash)];
assert(src_obc);
}
@@ -1678,8 +1683,11 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops,
result = -EINVAL;
break;
}
- t.clone_range(coll, hobject_t(osd_op.soid, obs.oi.soid.hash), obs.oi.soid,
- op.clonerange.src_offset, op.clonerange.length, op.clonerange.offset);
+ t.clone_range(coll, hobject_t(osd_op.soid, obs.oi.soid.key,
+ obs.oi.soid.hash),
+ obs.oi.soid, op.clonerange.src_offset,
+ op.clonerange.length, op.clonerange.offset);
+
write_update_size_and_usage(ctx->delta_stats, oi, ssc->snapset, ctx->modified_ranges,
op.clonerange.offset, op.clonerange.length, false);
@@ -2096,7 +2104,7 @@ int ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
dout(10) << "_rollback_to " << soid << " snapid " << snapid << dendl;
ObjectContext *rollback_to;
- int ret = find_object_context(hobject_t(soid.oid, snapid, soid.hash),
+ int ret = find_object_context(hobject_t(soid.oid, oi.oloc.key, snapid, soid.hash),
oi.oloc, &rollback_to, false, &cloneid);
if (ret) {
if (-ENOENT == ret) {
@@ -2109,7 +2117,7 @@ int ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
/* a different problem, like degraded pool
* with not-yet-restored object. We shouldn't have been able
* to get here; recovery should have completed first! */
- hobject_t rollback_target(soid.oid, cloneid, soid.hash);
+ hobject_t rollback_target(soid.oid, soid.key, cloneid, soid.hash);
assert(is_missing_object(rollback_target));
dout(20) << "_rollback_to attempted to roll back to a missing object "
<< rollback_target << " (requested snapid: ) " << snapid << dendl;
@@ -2546,7 +2554,7 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
ctx->op_t.setattr(coll, soid, SS_ATTR, bss);
if (!head_existed) {
// if we logically recreated the head, remove old _snapdir object
- hobject_t snapoid(soid.oid, CEPH_SNAPDIR, soid.hash);
+ hobject_t snapoid(soid.oid, soid.key, CEPH_SNAPDIR, soid.hash);
ctx->snapset_obc = get_object_context(snapoid, ctx->new_obs.oi.oloc, false);
if (ctx->snapset_obc && ctx->snapset_obc->obs.exists) {
@@ -2563,7 +2571,7 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
}
} else if (ctx->new_snapset.clones.size()) {
// save snapset on _snap
- hobject_t snapoid(soid.oid, CEPH_SNAPDIR, soid.hash);
+ hobject_t snapoid(soid.oid, soid.key, CEPH_SNAPDIR, soid.hash);
dout(10) << " final snapset " << ctx->new_snapset
<< " in " << snapoid << dendl;
ctx->at_version.version++;
@@ -2809,7 +2817,8 @@ void ReplicatedPG::eval_repop(RepGather *repop)
reply = new MOSDOpReply(op, 0, osd->osdmap->get_epoch(), 0);
reply->add_flags(CEPH_OSD_FLAG_ACK);
dout(10) << " sending ack on " << *repop << " " << reply << dendl;
- osd->cluster_messenger->send_message(reply, op->get_connection());
+ assert(entity_name_t::TYPE_OSD != op->get_connection()->peer_type);
+ osd->client_messenger->send_message(reply, op->get_connection());
repop->sent_ack = true;
}
@@ -3017,13 +3026,13 @@ ReplicatedPG::ObjectContext *ReplicatedPG::get_object_context(const hobject_t& s
SnapSetContext *ssc = NULL;
if (can_create)
- ssc = get_snapset_context(soid.oid, soid.hash, true);
+ ssc = get_snapset_context(soid.oid, soid.key, soid.hash, true);
obc = new ObjectContext(oi, true, ssc);
}
register_object_context(obc);
if (can_create && !obc->ssc)
- obc->ssc = get_snapset_context(soid.oid, soid.hash, true);
+ obc->ssc = get_snapset_context(soid.oid, soid.key, soid.hash, true);
if (r >= 0) {
obc->obs.oi.decode(bv);
@@ -3058,7 +3067,7 @@ int ReplicatedPG::find_object_context(const hobject_t& oid,
snapid_t *psnapid)
{
// want the head?
- hobject_t head(oid.oid, CEPH_NOSNAP, oid.hash);
+ hobject_t head(oid.oid, oid.key, CEPH_NOSNAP, oid.hash);
if (oid.snap == CEPH_NOSNAP) {
ObjectContext *obc = get_object_context(head, oloc, can_create);
if (!obc)
@@ -3067,13 +3076,13 @@ int ReplicatedPG::find_object_context(const hobject_t& oid,
*pobc = obc;
if (can_create && !obc->ssc)
- obc->ssc = get_snapset_context(oid.oid, oid.hash, true);
+ obc->ssc = get_snapset_context(oid.oid, oid.key, oid.hash, true);
return 0;
}
// we want a snap
- SnapSetContext *ssc = get_snapset_context(oid.oid, oid.hash, can_create);
+ SnapSetContext *ssc = get_snapset_context(oid.oid, oid.key, oid.hash, can_create);
if (!ssc)
return -ENOENT;
@@ -3115,7 +3124,7 @@ int ReplicatedPG::find_object_context(const hobject_t& oid,
put_snapset_context(ssc);
return -ENOENT;
}
- hobject_t soid(oid.oid, ssc->snapset.clones[k], oid.hash);
+ hobject_t soid(oid.oid, oid.key, ssc->snapset.clones[k], oid.hash);
put_snapset_context(ssc); // we're done with ssc
ssc = 0;
@@ -3179,7 +3188,9 @@ void ReplicatedPG::put_object_contexts(map<hobject_t,ObjectContext*>& obcv)
obcv.clear();
}
-ReplicatedPG::SnapSetContext *ReplicatedPG::get_snapset_context(const object_t& oid, ps_t seed,
+ReplicatedPG::SnapSetContext *ReplicatedPG::get_snapset_context(const object_t& oid,
+ const string& key,
+ ps_t seed,
bool can_create)
{
SnapSetContext *ssc;
@@ -3188,11 +3199,11 @@ ReplicatedPG::SnapSetContext *ReplicatedPG::get_snapset_context(const object_t&
ssc = p->second;
} else {
bufferlist bv;
- hobject_t head(oid, CEPH_NOSNAP, seed);
+ hobject_t head(oid, key, CEPH_NOSNAP, seed);
int r = osd->store->getattr(coll, head, SS_ATTR, bv);
if (r < 0) {
// try _snapset
- hobject_t snapdir(oid, CEPH_SNAPDIR, seed);
+ hobject_t snapdir(oid, key, CEPH_SNAPDIR, seed);
r = osd->store->getattr(coll, snapdir, SS_ATTR, bv);
if (r < 0 && !can_create)
return NULL;
@@ -3602,7 +3613,7 @@ int ReplicatedPG::pull(const hobject_t& soid)
}
// check snapset
- SnapSetContext *ssc = get_snapset_context(soid.oid, soid.hash, false);
+ SnapSetContext *ssc = get_snapset_context(soid.oid, soid.key, soid.hash, false);
dout(10) << " snapset " << ssc->snapset << dendl;
calc_clone_subsets(ssc->snapset, soid, missing,
data_subset, clone_subsets);
@@ -3710,7 +3721,7 @@ void ReplicatedPG::push_to_replica(ObjectContext *obc, const hobject_t& soid, in
return push_start(snapdir, peer);
}
- SnapSetContext *ssc = get_snapset_context(soid.oid, soid.hash, false);
+ SnapSetContext *ssc = get_snapset_context(soid.oid, soid.key, soid.hash, false);
dout(15) << "push_to_replica snapset is " << ssc->snapset << dendl;
calc_clone_subsets(ssc->snapset, soid, peer_missing[peer],
data_subset, clone_subsets);
@@ -3718,7 +3729,7 @@ void ReplicatedPG::push_to_replica(ObjectContext *obc, const hobject_t& soid, in
} else if (soid.snap == CEPH_NOSNAP) {
// pushing head or unversioned object.
// base this on partially on replica's clones?
- SnapSetContext *ssc = get_snapset_context(soid.oid, soid.hash, false);
+ SnapSetContext *ssc = get_snapset_context(soid.oid, soid.key, soid.hash, false);
dout(15) << "push_to_replica snapset is " << ssc->snapset << dendl;
calc_head_subsets(ssc->snapset, soid, peer_missing[peer], data_subset, clone_subsets);
put_snapset_context(ssc);
@@ -4062,7 +4073,7 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op)
if (soid.snap && soid.snap < CEPH_NOSNAP) {
// clone. make sure we have enough data.
- SnapSetContext *ssc = get_snapset_context(soid.oid, soid.hash, false);
+ SnapSetContext *ssc = get_snapset_context(soid.oid, soid.key, soid.hash, false);
assert(ssc);
clone_subsets.clear(); // forget what pusher said; recalculate cloning.
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index faadd3f03bf..f9295e4e13c 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -510,7 +510,8 @@ protected:
ObjectContext **pobc,
bool can_create, snapid_t *psnapid=NULL);
- SnapSetContext *get_snapset_context(const object_t& oid, ps_t seed, bool can_create);
+ SnapSetContext *get_snapset_context(const object_t& oid, const string &key,
+ ps_t seed, bool can_create);
void register_snapset_context(SnapSetContext *ssc) {
if (!ssc->registered) {
ssc->registered = true;
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 75a93ca73fd..2bdf099d0cf 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -574,7 +574,7 @@ ps_t object_info_t::legacy_object_locator_to_ps(const object_t &oid,
void object_info_t::encode(bufferlist& bl) const
{
- const __u8 v = 6;
+ const __u8 v = 7;
::encode(v, bl);
::encode(soid, bl);
::encode(oloc, bl);
@@ -603,12 +603,15 @@ void object_info_t::decode(bufferlist::iterator& bl)
sobject_t obj;
::decode(obj, bl);
::decode(oloc, bl);
- soid = hobject_t(obj.oid, obj.snap, 0);
+ soid = hobject_t(obj.oid, oloc.key, obj.snap, 0);
soid.hash = legacy_object_locator_to_ps(soid.oid, oloc);
} else if (v >= 6) {
::decode(soid, bl);
::decode(oloc, bl);
+ if (v == 6)
+ soid.key = oloc.key;
}
+
if (v >= 5)
::decode(category, bl);
::decode(version, bl);
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 19ef1224b57..a4dad0ddf1a 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -632,7 +632,6 @@ void ObjectCacher::bh_write_commit(int64_t poolid, sobject_t oid, loff_t start,
<< dendl;
if (objects[poolid].count(oid) == 0) {
ldout(cct, 7) << "bh_write_commit no object cache" << dendl;
- assert(0);
} else {
Object *ob = objects[poolid][oid];
@@ -642,7 +641,8 @@ void ObjectCacher::bh_write_commit(int64_t poolid, sobject_t oid, loff_t start,
p++) {
BufferHead *bh = p->second;
- if (bh->start() > start+(loff_t)length) break;
+ if (bh->start() > start+(loff_t)length)
+ break;
if (bh->start() < start &&
bh->end() > start+(loff_t)length) {
diff --git a/src/rados.cc b/src/rados.cc
index 39cc1e2f1a2..8b43bb08375 100644
--- a/src/rados.cc
+++ b/src/rados.cc
@@ -753,6 +753,7 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
"KB", "objects", "clones", "degraded",
"unfound", "rd", "rd KB", "wr", "wr KB");
} else {
+ formatter->open_object_section("stats");
formatter->open_array_section("pools");
}
for (map<string, librados::stats_map>::iterator c = stats.begin(); c != stats.end(); ++c) {
@@ -825,6 +826,7 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
printf(" total avail %12lld\n", (long long unsigned)tstats.kb_avail);
printf(" total space %12lld\n", (long long unsigned)tstats.kb);
} else {
+ formatter->close_section();
formatter->dump_format("total_objects", "%lld", (long long unsigned)tstats.num_objects);
formatter->dump_format("total_used", "%lld", (long long unsigned)tstats.kb_used);
formatter->dump_format("total_avail", "%lld", (long long unsigned)tstats.kb_avail);
diff --git a/src/rgw/rgw_access.h b/src/rgw/rgw_access.h
index 63e199df168..9a7a19623a7 100644
--- a/src/rgw/rgw_access.h
+++ b/src/rgw/rgw_access.h
@@ -232,7 +232,7 @@ public:
virtual bool supports_tmap() { return false; }
- virtual int tmap_get(rgw_obj& obj, bufferlist& bl) { return -ENOTSUP; }
+ virtual int tmap_get(rgw_obj& obj, bufferlist& header, std::map<string, bufferlist>& m) { return -ENOTSUP; }
virtual int tmap_set(rgw_obj& obj, std::string& key, bufferlist& bl) { return -ENOTSUP; }
virtual int tmap_set(rgw_obj& obj, map<std::string, bufferlist>& m) { return -ENOTSUP; }
virtual int tmap_create(rgw_obj& obj, std::string& key, bufferlist& bl) { return -ENOTSUP; }
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 7ed07881956..dd6d31f9f85 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -57,11 +57,11 @@ void _usage()
cerr << " --uid=<id> user id\n";
cerr << " --subuser=<name> subuser name\n";
cerr << " --access-key=<key> S3 access key\n";
- cerr << " --os-user=<group:name> OpenStack user\n";
+ cerr << " --swift-user=<group:name> Swift user\n";
cerr << " --email=<email>\n";
cerr << " --auth_uid=<auid> librados uid\n";
cerr << " --secret=<key> S3 key\n";
- cerr << " --os-secret=<key> OpenStack key\n";
+ cerr << " --swift-secret=<key> Swift key\n";
cerr << " --gen-access-key generate random access key\n";
cerr << " --gen-secret generate random secret key\n";
cerr << " --access=<access> Set access permissions for sub-user, should be one\n";
@@ -266,32 +266,76 @@ string escape_str(string& src, char c)
return dest;
}
-static void show_user_info( RGWUserInfo& info)
+static void show_user_info(RGWUserInfo& info, const char *format, Formatter *formatter)
{
map<string, RGWAccessKey>::iterator kiter;
map<string, RGWSubUser>::iterator uiter;
- cout << "User ID: " << info.user_id << std::endl;
- cout << "RADOS UID: " << info.auid << std::endl;
- cout << "Keys:" << std::endl;
- for (kiter = info.access_keys.begin(); kiter != info.access_keys.end(); ++kiter) {
- RGWAccessKey& k = kiter->second;
- cout << " User: " << info.user_id << (k.subuser.empty() ? "" : ":") << k.subuser << std::endl;
- cout << " Access Key: " << k.id << std::endl;
- cout << " Secret Key: " << k.key << std::endl;
- }
- cout << "Users: " << std::endl;
- for (uiter = info.subusers.begin(); uiter != info.subusers.end(); ++uiter) {
- RGWSubUser& u = uiter->second;
- cout << " Name: " << info.user_id << ":" << u.name << std::endl;
- char buf[256];
- perm_to_str(u.perm_mask, buf, sizeof(buf));
- cout << " Permissions: " << buf << std::endl;
+
+ if (!format) {
+ cout << "User ID: " << info.user_id << std::endl;
+ cout << "RADOS UID: " << info.auid << std::endl;
+ cout << "Keys:" << std::endl;
+ for (kiter = info.access_keys.begin(); kiter != info.access_keys.end(); ++kiter) {
+ RGWAccessKey& k = kiter->second;
+ cout << " User: " << info.user_id << (k.subuser.empty() ? "" : ":") << k.subuser << std::endl;
+ cout << " Access Key: " << k.id << std::endl;
+ cout << " Secret Key: " << k.key << std::endl;
+ }
+ cout << "Users: " << std::endl;
+ for (uiter = info.subusers.begin(); uiter != info.subusers.end(); ++uiter) {
+ RGWSubUser& u = uiter->second;
+ cout << " Name: " << info.user_id << ":" << u.name << std::endl;
+ char buf[256];
+ perm_to_str(u.perm_mask, buf, sizeof(buf));
+ cout << " Permissions: " << buf << std::endl;
+ }
+ cout << "Display Name: " << info.display_name << std::endl;
+ cout << "Email: " << info.user_email << std::endl;
+ cout << "Swift User: " << (info.swift_name.size() ? info.swift_name : "<undefined>")<< std::endl;
+ cout << "Swift Key: " << (info.swift_key.size() ? info.swift_key : "<undefined>")<< std::endl;
+ } else {
+ formatter->open_object_section("user_info");
+
+ formatter->dump_string("user_id", info.user_id.c_str());
+ formatter->dump_format("rados_uid", "%lld", info.auid);
+ formatter->dump_string("display_name", info.display_name.c_str());
+ formatter->dump_string("email", info.user_email.c_str());
+ formatter->dump_string("swift_user", info.swift_name.c_str());
+ formatter->dump_string("swift_key", info.swift_key.c_str());
+
+ // keys
+ formatter->open_array_section("keys");
+ for (kiter = info.access_keys.begin(); kiter != info.access_keys.end(); ++kiter) {
+ RGWAccessKey& k = kiter->second;
+ const char *sep = (k.subuser.empty() ? "" : ":");
+ const char *subuser = (k.subuser.empty() ? "" : k.subuser.c_str());
+ formatter->open_object_section("key");
+ formatter->dump_format("user", "%s%s%s", info.user_id.c_str(), sep, subuser);
+ formatter->dump_string("access_key", k.id);
+ formatter->dump_string("secret_key", k.key);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ formatter->close_section();
+
+ // subusers
+ formatter->open_array_section("subusers");
+ for (uiter = info.subusers.begin(); uiter != info.subusers.end(); ++uiter) {
+ RGWSubUser& u = uiter->second;
+ formatter->open_object_section("user");
+ formatter->dump_format("id", "%s:%s", info.user_id.c_str(), u.name.c_str());
+ char buf[256];
+ perm_to_str(u.perm_mask, buf, sizeof(buf));
+ formatter->dump_string("permissions", buf);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ formatter->close_section();
+
+ formatter->close_section();
+ formatter->flush(cout);
}
- cout << "Display Name: " << info.display_name << std::endl;
- cout << "Email: " << info.user_email << std::endl;
- cout << "OpenStack User: " << (info.openstack_name.size() ? info.openstack_name : "<undefined>")<< std::endl;
- cout << "OpenStack Key: " << (info.openstack_key.size() ? info.openstack_key : "<undefined>")<< std::endl;
}
static int create_bucket(string bucket_str, string& user_id, string& display_name, uint64_t auid)
@@ -358,11 +402,11 @@ static void remove_old_indexes(RGWUserInfo& old_info, RGWUserInfo new_info)
}
}
- if (!old_info.openstack_name.empty() &&
- old_info.openstack_name.compare(new_info.openstack_name) != 0) {
- ret = rgw_remove_openstack_name_index(new_info.user_id, old_info.openstack_name);
+ if (!old_info.swift_name.empty() &&
+ old_info.swift_name.compare(new_info.swift_name) != 0) {
+ ret = rgw_remove_swift_name_index(new_info.user_id, old_info.swift_name);
if (ret < 0 && ret != -ENOENT) {
- cerr << "ERROR: could not remove index for openstack_name " << old_info.openstack_name << " return code: " << ret << std::endl;
+ cerr << "ERROR: could not remove index for swift_name " << old_info.swift_name << " return code: " << ret << std::endl;
success = false;
}
}
@@ -421,7 +465,12 @@ int process_intent_log(rgw_bucket& bucket, string& oid, time_t epoch, int flags,
try {
while (!iter.end()) {
struct rgw_intent_log_entry entry;
- ::decode(entry, iter);
+ try {
+ ::decode(entry, iter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: " << __func__ << "(): caught buffer::error" << dendl;
+ return -EIO;
+ }
if (entry.op_time.sec() > epoch) {
cerr << "skipping entry for obj=" << obj << " entry.op_time=" << entry.op_time.sec() << " requested epoch=" << epoch << std::endl;
cerr << "skipping intent log" << std::endl; // no use to continue
@@ -484,7 +533,7 @@ int main(int argc, char **argv)
common_init_finish(g_ceph_context);
std::string user_id, access_key, secret_key, user_email, display_name;
- std::string bucket_name, object, openstack_user, openstack_key;
+ std::string bucket_name, object, swift_user, swift_key;
std::string date, time, subuser, access, format;
rgw_bucket bucket;
uint32_t perm_mask = 0;
@@ -540,9 +589,9 @@ int main(int argc, char **argv)
}
auid = tmp;
} else if (ceph_argparse_witharg(args, i, &val, "--os-user", (char*)NULL)) {
- openstack_user = val;
+ swift_user = val;
} else if (ceph_argparse_witharg(args, i, &val, "--os-secret", (char*)NULL)) {
- openstack_key = val;
+ swift_key = val;
} else if (ceph_argparse_witharg(args, i, &val, "--date", (char*)NULL)) {
date = val;
} else if (ceph_argparse_witharg(args, i, &val, "--time", (char*)NULL)) {
@@ -654,12 +703,12 @@ int main(int argc, char **argv)
cerr << "could not find user by specified access key" << std::endl;
}
}
- if (!found && (!openstack_user.empty())) {
- s = openstack_user;
- if (rgw_get_user_info_by_openstack(s, info) >= 0) {
+ if (!found && (!swift_user.empty())) {
+ s = swift_user;
+ if (rgw_get_user_info_by_swift(s, info) >= 0) {
found = true;
} else
- cerr << "could not find user by specified openstack username" << std::endl;
+ cerr << "could not find user by specified swift username" << std::endl;
}
if (found)
user_id = info.user_id.c_str();
@@ -799,10 +848,10 @@ int main(int argc, char **argv)
info.user_email = user_email;
if (auid != (uint64_t)-1)
info.auid = auid;
- if (!openstack_user.empty())
- info.openstack_name = openstack_user;
- if (!openstack_key.empty())
- info.openstack_key = openstack_key;
+ if (!swift_user.empty())
+ info.swift_name = swift_user;
+ if (!swift_key.empty())
+ info.swift_key = swift_key;
if (!subuser.empty()) {
RGWSubUser u;
u.name = subuser;
@@ -817,7 +866,7 @@ int main(int argc, char **argv)
remove_old_indexes(old_info, info);
- show_user_info(info);
+ show_user_info(info, format.c_str(), formatter);
break;
case OPT_SUBUSER_RM:
@@ -828,7 +877,7 @@ int main(int argc, char **argv)
cerr << "error storing user info: " << cpp_strerror(-err) << std::endl;
break;
}
- show_user_info(info);
+ show_user_info(info, format.c_str(), formatter);
break;
case OPT_KEY_RM:
@@ -843,11 +892,11 @@ int main(int argc, char **argv)
break;
}
}
- show_user_info(info);
+ show_user_info(info, format.c_str(), formatter);
break;
case OPT_USER_INFO:
- show_user_info(info);
+ show_user_info(info, format.c_str(), formatter);
break;
}
@@ -859,7 +908,12 @@ int main(int argc, char **argv)
RGWAccessControlPolicy policy;
if (ret >= 0) {
bufferlist::iterator iter = bl.begin();
- policy.decode(iter);
+ try {
+ policy.decode(iter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: caught buffer::error, could not decode policy" << dendl;
+ return -EIO;
+ }
policy.to_xml(cout);
cout << std::endl;
}
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc
index a1c440fee3b..94eebd8f045 100644
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -15,8 +15,6 @@ static rgw_bucket pi_buckets(BUCKETS_POOL_NAME);
static string avail_pools = ".pools.avail";
static string pool_name_prefix = "p";
-#define POOLS_PREALLOCATE_NUM 100
-
int rgw_store_bucket_info(string& bucket_name, RGWBucketInfo& info)
{
@@ -49,7 +47,12 @@ int rgw_get_bucket_info(string& bucket_name, RGWBucketInfo& info)
}
bufferlist::iterator iter = bl.begin();
- ::decode(info, iter);
+ try {
+ ::decode(info, iter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: could not decode buffer info, caught buffer::error" << dendl;
+ return -EIO;
+ }
RGW_LOG(0) << "rgw_get_bucket_info: bucket=" << info.bucket << dendl;
@@ -64,11 +67,11 @@ int rgw_remove_bucket_info(string& bucket_name)
return ret;
}
-static int generate_preallocated_pools(vector<string>& pools)
+static int generate_preallocated_pools(vector<string>& pools, int num)
{
vector<string> names;
- for (int i = 0; i < POOLS_PREALLOCATE_NUM; i++) {
+ for (int i = 0; i < num; i++) {
string name = pool_name_prefix;
append_rand_alpha(pool_name_prefix, name, 8);
names.push_back(name);
@@ -98,18 +101,8 @@ static int generate_preallocated_pools(vector<string>& pools)
return 0;
}
-static int generate_pool(string& bucket_name, rgw_bucket& bucket)
+static int register_available_pools(vector<string>& pools)
{
- vector<string> pools;
- int ret = generate_preallocated_pools(pools);
- if (ret < 0) {
- RGW_LOG(0) << "generate_preallocad_pools returned " << ret << dendl;
- return ret;
- }
- bucket.pool = pools.back();
- pools.pop_back();
- bucket.name = bucket_name;
-
map<string, bufferlist> m;
vector<string>::iterator iter;
@@ -119,7 +112,7 @@ static int generate_pool(string& bucket_name, rgw_bucket& bucket)
m[name] = bl;
}
rgw_obj obj(pi_buckets, avail_pools);
- ret = rgwstore->tmap_set(obj, m);
+ int ret = rgwstore->tmap_set(obj, m);
if (ret == -ENOENT) {
rgw_bucket new_bucket;
map<string,bufferlist> attrs;
@@ -136,6 +129,26 @@ static int generate_pool(string& bucket_name, rgw_bucket& bucket)
return 0;
}
+static int generate_pool(string& bucket_name, rgw_bucket& bucket)
+{
+ vector<string> pools;
+ int ret = generate_preallocated_pools(pools, g_conf->rgw_pools_preallocate_max);
+ if (ret < 0) {
+ RGW_LOG(0) << "generate_preallocad_pools returned " << ret << dendl;
+ return ret;
+ }
+ bucket.pool = pools.back();
+ pools.pop_back();
+ bucket.name = bucket_name;
+
+ ret = register_available_pools(pools);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
static int withdraw_pool(string& pool_name)
{
rgw_obj obj(pi_buckets, avail_pools);
@@ -143,15 +156,45 @@ static int withdraw_pool(string& pool_name)
return rgwstore->tmap_set(obj, pool_name, bl);
}
+int rgw_bucket_maintain_pools()
+{
+ bufferlist header;
+ map<string, bufferlist> m;
+ string pool_name;
+
+ rgw_obj obj(pi_buckets, avail_pools);
+ int ret = rgwstore->tmap_get(obj, header, m);
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ }
+
+ if ((int)m.size() < g_conf->rgw_pools_preallocate_threshold) {
+ RGW_LOG(0) << "rgw_bucket_maintain_pools allocating pools (m.size()=" << m.size() << " threshold="
+ << g_conf->rgw_pools_preallocate_threshold << ")" << dendl;
+ vector<string> pools;
+ ret = generate_preallocated_pools(pools, g_conf->rgw_pools_preallocate_max - m.size());
+ if (ret < 0) {
+ RGW_LOG(0) << "failed to preallocate pools" << dendl;
+ return ret;
+ }
+ ret = register_available_pools(pools);
+ if (ret < 0) {
+ RGW_LOG(0) << "failed to register available pools" << dendl;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
int rgw_bucket_allocate_pool(string& bucket_name, rgw_bucket& bucket)
{
- bufferlist bl;
bufferlist header;
map<string, bufferlist> m;
string pool_name;
rgw_obj obj(pi_buckets, avail_pools);
- int ret = rgwstore->tmap_get(obj, bl);
+ int ret = rgwstore->tmap_get(obj, header, m);
if (ret < 0) {
if (ret == -ENOENT) {
return generate_pool(bucket_name, bucket);
@@ -159,10 +202,6 @@ int rgw_bucket_allocate_pool(string& bucket_name, rgw_bucket& bucket)
return ret;
}
- bufferlist::iterator iter = bl.begin();
- ::decode(header, iter);
- ::decode(m, iter);
-
if (!m.size()) {
return generate_pool(bucket_name, bucket);
}
diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h
index 63c339c1402..91706f56455 100644
--- a/src/rgw/rgw_bucket.h
+++ b/src/rgw/rgw_bucket.h
@@ -17,6 +17,7 @@ extern int rgw_bucket_allocate_pool(string& bucket_name, rgw_bucket& bucket);
extern int rgw_create_bucket(std::string& id, string& bucket_name, rgw_bucket& bucket,
map<std::string, bufferlist>& attrs, bool exclusive = true, uint64_t auid = 0);
+extern int rgw_bucket_maintain_pools(void);
#endif
diff --git a/src/rgw/rgw_cache.h b/src/rgw/rgw_cache.h
index d5c8c11ed13..8a2ab0dbaa4 100644
--- a/src/rgw/rgw_cache.h
+++ b/src/rgw/rgw_cache.h
@@ -320,9 +320,12 @@ int RGWCache<T>::watch_cb(int opcode, uint64_t ver, bufferlist& bl)
try {
bufferlist::iterator iter = bl.begin();
::decode(info, iter);
- } catch (buffer::end_of_buffer *err) {
+ } catch (buffer::end_of_buffer& err) {
RGW_LOG(0) << "ERROR: got bad notification" << dendl;
return -EIO;
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: buffer::error" << dendl;
+ return -EIO;
}
string name = normal_name(info.obj);
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index abb644efeac..c1db8fa59c6 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -263,7 +263,10 @@ bool verify_permission(RGWAccessControlPolicy *policy, string& uid, int user_per
if (!policy)
return false;
- int acl_perm = policy->get_perm(g_ceph_context, uid, perm) & user_perm_mask;
+ int policy_perm = policy->get_perm(g_ceph_context, uid, perm);
+ int acl_perm = policy_perm & user_perm_mask;
+
+ RGW_LOG(10) << " uid=" << uid << " requested perm (type)=" << perm << ", policy perm=" << policy_perm << ", user_perm_mask=" << user_perm_mask << ", acl perm=" << acl_perm << dendl;
return (perm == acl_perm);
}
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 6f4b77e2094..4f309bc3e1d 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -67,8 +67,8 @@ extern string rgw_obj_category_none;
#define RGW_FORMAT_XML 1
#define RGW_FORMAT_JSON 2
-#define RGW_REST_OPENSTACK 0x1
-#define RGW_REST_OPENSTACK_AUTH 0x2
+#define RGW_REST_SWIFT 0x1
+#define RGW_REST_SWIFT_AUTH 0x2
#define RGW_SUSPENDED_USER_AUID (uint64_t)-2
@@ -112,6 +112,7 @@ extern string rgw_obj_category_none;
#define ERR_LENGTH_REQUIRED 2011
#define ERR_REQUEST_TIME_SKEWED 2012
#define ERR_USER_SUSPENDED 2100
+#define ERR_INTERNAL_ERROR 2200
typedef void *RGWAccessHandle;
@@ -271,8 +272,8 @@ struct RGWUserInfo
string user_id;
string display_name;
string user_email;
- string openstack_name;
- string openstack_key;
+ string swift_name;
+ string swift_key;
map<string, RGWAccessKey> access_keys;
map<string, RGWSubUser> subusers;
__u8 suspended;
@@ -295,8 +296,8 @@ struct RGWUserInfo
::encode(secret_key, bl);
::encode(display_name, bl);
::encode(user_email, bl);
- ::encode(openstack_name, bl);
- ::encode(openstack_key, bl);
+ ::encode(swift_name, bl);
+ ::encode(swift_key, bl);
::encode(user_id, bl);
::encode(access_keys, bl);
::encode(subusers, bl);
@@ -319,8 +320,8 @@ struct RGWUserInfo
}
::decode(display_name, bl);
::decode(user_email, bl);
- if (ver >= 3) ::decode(openstack_name, bl);
- if (ver >= 4) ::decode(openstack_key, bl);
+ if (ver >= 3) ::decode(swift_name, bl);
+ if (ver >= 4) ::decode(swift_key, bl);
if (ver >= 5)
::decode(user_id, bl);
else
@@ -481,7 +482,7 @@ struct req_state {
string bucket_name_str;
string object_str;
- map<string, string> x_amz_map;
+ map<string, string> x_meta_map;
RGWUserInfo user;
RGWAccessControlPolicy *acl;
diff --git a/src/rgw/rgw_formats.h b/src/rgw/rgw_formats.h
index 5d7ee0139f4..c43b24b050d 100644
--- a/src/rgw/rgw_formats.h
+++ b/src/rgw/rgw_formats.h
@@ -13,7 +13,7 @@ struct plain_stack_entry {
};
/* FIXME: this class is mis-named.
- * FIXME: This was a hack to send certain openstack messages.
+ * FIXME: This was a hack to send certain swift messages.
* There is a much better way to do this.
*/
class RGWFormatter_Plain : public Formatter {
diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc
index 8836c52a2cc..a610e82d8da 100644
--- a/src/rgw/rgw_log.cc
+++ b/src/rgw/rgw_log.cc
@@ -17,6 +17,7 @@ static void set_param_str(struct req_state *s, const char *name, string& str)
int rgw_log_op(struct req_state *s)
{
struct rgw_log_entry entry;
+ uint64_t pool_id;
if (!s->should_log)
return 0;
@@ -26,8 +27,13 @@ int rgw_log_op(struct req_state *s)
return -EINVAL;
}
if (s->err.ret == -ERR_NO_SUCH_BUCKET) {
- RGW_LOG(0) << "bucket " << s->bucket << " doesn't exist, not logging" << dendl;
- return 0;
+ if (!g_conf->rgw_log_nonexistent_bucket) {
+ RGW_LOG(0) << "bucket " << s->bucket << " doesn't exist, not logging" << dendl;
+ return 0;
+ }
+ pool_id = 0;
+ } else {
+ pool_id = s->pool_id;
}
entry.bucket = s->bucket_name;
@@ -67,7 +73,7 @@ int rgw_log_op(struct req_state *s)
entry.http_status = "200"; // default
entry.error_code = s->err.s3_code;
- entry.pool_id = s->pool_id;
+ entry.pool_id = pool_id;
bufferlist bl;
::encode(entry, bl);
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index 3a6b833a524..0a2e108fabc 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -17,14 +17,16 @@
#include "common/config.h"
#include "common/errno.h"
#include "common/WorkQueue.h"
+#include "common/Timer.h"
#include "rgw_common.h"
#include "rgw_access.h"
#include "rgw_acl.h"
#include "rgw_user.h"
#include "rgw_op.h"
#include "rgw_rest.h"
-#include "rgw_os.h"
+#include "rgw_swift.h"
#include "rgw_log.h"
+#include "rgw_bucket.h"
#include <map>
#include <string>
@@ -218,6 +220,16 @@ done:
RGW_LOG(0) << "====== req done fcgx=" << hex << fcgx << dec << " http_status=" << http_ret << " ======" << dendl;
}
+class C_RGWMaintenanceTick : public Context {
+ SafeTimer *timer;
+public:
+ C_RGWMaintenanceTick(SafeTimer *t) : timer(t) {}
+ void finish(int r) {
+ rgw_bucket_maintain_pools();
+ RGW_LOG(20) << "C_RGWMaintenanceTick::finish()" << dendl;
+ timer->add_event_after(g_conf->rgw_maintenance_tick_interval, new C_RGWMaintenanceTick(timer));
+ }
+};
/*
* start up the RADOS connection and then handle HTTP messages as they come in
*/
@@ -253,10 +265,22 @@ int main(int argc, const char **argv)
return EIO;
}
- RGWProcess process(g_ceph_context, 20);
+ RGWProcess process(g_ceph_context, g_conf->rgw_thread_pool_size);
+
+ Mutex lock("rgw_timer_lock");
+ SafeTimer timer(g_ceph_context, lock);
+
+ lock.Lock();
+ timer.init();
+ timer.add_event_after(g_conf->rgw_maintenance_tick_interval, new C_RGWMaintenanceTick(&timer));
+ lock.Unlock();
process.run();
+ lock.Lock();
+ timer.shutdown();
+ lock.Unlock();
+
return 0;
}
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index b357fb2683a..2fe571d56ed 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -114,7 +114,7 @@ static void format_xattr(std::string &xattr)
void get_request_metadata(struct req_state *s, map<string, bufferlist>& attrs)
{
map<string, string>::iterator iter;
- for (iter = s->x_amz_map.begin(); iter != s->x_amz_map.end(); ++iter) {
+ for (iter = s->x_meta_map.begin(); iter != s->x_meta_map.end(); ++iter) {
const string &name(iter->first);
string &xattr(iter->second);
#define X_AMZ_META "x-amz-meta"
@@ -148,7 +148,12 @@ static int get_policy_from_attr(void *ctx, RGWAccessControlPolicy *policy, rgw_o
if (ret >= 0) {
bufferlist::iterator iter = bl.begin();
- policy->decode(iter);
+ try {
+ policy->decode(iter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "error: could not decode policy, caught buffer::error" << dendl;
+ return -EIO;
+ }
if (g_conf->rgw_log >= 15) {
RGW_LOG(15) << "Read AccessControlPolicy" << dendl;
policy->to_xml(cerr);
@@ -327,7 +332,7 @@ int RGWListBuckets::verify_permission()
void RGWListBuckets::execute()
{
- ret = rgw_read_user_buckets(s->user.user_id, buckets, !!(s->prot_flags & RGW_REST_OPENSTACK));
+ ret = rgw_read_user_buckets(s->user.user_id, buckets, !!(s->prot_flags & RGW_REST_SWIFT));
if (ret < 0) {
/* hmm.. something wrong here.. the user was authenticated, so it
should exist, just try to recreate */
@@ -407,7 +412,7 @@ void RGWListBucket::execute()
}
url_decode(s->args.get("delimiter"), delimiter);
- if (s->prot_flags & RGW_REST_OPENSTACK) {
+ if (s->prot_flags & RGW_REST_SWIFT) {
string path_args;
url_decode(s->args.get("path"), path_args);
if (!path_args.empty()) {
@@ -421,7 +426,7 @@ void RGWListBucket::execute()
}
ret = rgwstore->list_objects(s->user.user_id, s->bucket, max, prefix, delimiter, marker, objs, common_prefixes,
- !!(s->prot_flags & RGW_REST_OPENSTACK), no_ns, &is_truncated, NULL);
+ !!(s->prot_flags & RGW_REST_SWIFT), no_ns, &is_truncated, NULL);
done:
send_response();
@@ -749,7 +754,6 @@ void RGWPutObj::execute()
goto done_err;
bl.clear();
- map<string, bufferlist> meta_attrs;
RGWUploadPartInfo info;
string p = "part.";
p.append(part_num);
@@ -758,13 +762,10 @@ void RGWPutObj::execute()
info.size = s->obj_size;
info.modified = ceph_clock_now(g_ceph_context);
::encode(info, bl);
- meta_attrs[p] = bl;
rgw_obj meta_obj(s->bucket, multipart_meta_obj, s->object_str, mp_ns);
- // we don't set a category, since by now a category should have already been assigned
- string nocategory;
- ret = rgwstore->put_obj_meta(s->obj_ctx, s->user.user_id, meta_obj, NULL, meta_attrs, nocategory, false);
+ ret = rgwstore->tmap_set(meta_obj, p, bl);
}
}
done:
@@ -1191,11 +1192,12 @@ done:
}
static int get_multiparts_info(struct req_state *s, string& meta_oid, map<uint32_t, RGWUploadPartInfo>& parts,
- RGWAccessControlPolicy& policy, map<string, bufferlist>& new_attrs)
+ RGWAccessControlPolicy& policy, map<string, bufferlist>& attrs)
{
void *handle;
- map<string, bufferlist> attrs;
+ map<string, bufferlist> parts_map;
map<string, bufferlist>::iterator iter;
+ bufferlist header;
rgw_obj obj(s->bucket, meta_oid, s->object_str, mp_ns);
@@ -1206,24 +1208,35 @@ static int get_multiparts_info(struct req_state *s, string& meta_oid, map<uint32
if (ret < 0)
return ret;
+ ret = rgwstore->tmap_get(obj, header, parts_map);
+ if (ret < 0)
+ return ret;
+
for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
string name = iter->first;
if (name.compare(RGW_ATTR_ACL) == 0) {
bufferlist& bl = iter->second;
bufferlist::iterator bli = bl.begin();
- ::decode(policy, bli);
- new_attrs[RGW_ATTR_ACL] = bl;
- continue;
- }
- if (name.compare(0, 5, "part.") != 0) {
- new_attrs[iter->first] = iter->second;
- continue;
+ try {
+ ::decode(policy, bli);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: could not decode policy, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ break;
}
+ }
+
+ for (iter = parts_map.begin(); iter != parts_map.end(); ++iter) {
bufferlist& bl = iter->second;
bufferlist::iterator bli = bl.begin();
RGWUploadPartInfo info;
- ::decode(info, bli);
+ try {
+ ::decode(info, bli);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: could not decode policy, caught buffer::error" << dendl;
+ }
parts[info.num] = info;
}
return 0;
@@ -1454,7 +1467,7 @@ void RGWListBucketMultiparts::execute()
if (ret < 0)
goto done;
- if (s->prot_flags & RGW_REST_OPENSTACK) {
+ if (s->prot_flags & RGW_REST_SWIFT) {
string path_args;
url_decode(s->args.get("path"), path_args);
if (!path_args.empty()) {
@@ -1468,7 +1481,7 @@ void RGWListBucketMultiparts::execute()
}
marker_meta = marker.get_meta();
ret = rgwstore->list_objects(s->user.user_id, s->bucket, max_uploads, prefix, delimiter, marker_meta, objs, common_prefixes,
- !!(s->prot_flags & RGW_REST_OPENSTACK), mp_ns, &is_truncated, &mp_filter);
+ !!(s->prot_flags & RGW_REST_SWIFT), mp_ns, &is_truncated, &mp_filter);
if (objs.size()) {
vector<RGWObjEnt>::iterator iter;
RGWMultipartUploadEntry entry;
diff --git a/src/rgw/rgw_os_auth.h b/src/rgw/rgw_os_auth.h
deleted file mode 100644
index 4a31b3dca5c..00000000000
--- a/src/rgw/rgw_os_auth.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef CEPH_RGW_OS_AUTH_H
-#define CEPH_RGW_OS_AUTH_H
-
-#include "rgw_op.h"
-
-#define RGW_OS_TOKEN_EXPIRATION (15 * 60)
-
-extern int rgw_os_verify_signed_token(const char *token, RGWUserInfo& info);
-
-class RGW_OS_Auth_Get : public RGWOp {
-public:
- RGW_OS_Auth_Get() {}
- ~RGW_OS_Auth_Get() {}
-
- int verify_permission() { return 0; }
- void execute();
-};
-
-class RGWHandler_OS_Auth : public RGWHandler {
-public:
- RGWHandler_OS_Auth() {}
- ~RGWHandler_OS_Auth() {}
- RGWOp *get_op();
- void put_op(RGWOp *op);
-
- int authorize();
- int read_permissions() { return 0; }
-};
-
-#endif
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index f9bc8e6c5a2..404bddb33fb 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -280,7 +280,12 @@ int RGWRados::list_objects(string& id, rgw_bucket& bucket, int max, string& pref
bufferlist& bl = iter->second;
bufferlist::iterator i = bl.begin();
RGWAccessControlPolicy policy;
- policy.decode_owner(i);
+ try {
+ policy.decode_owner(i);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: could not decode policy for oid=" << oid << ", caught buffer::error" << dendl;
+ continue;
+ }
ACLOwner& owner = policy.get_owner();
obj.owner = owner.get_id();
obj.owner_display_name = owner.get_display_name();
@@ -793,16 +798,26 @@ int RGWRados::get_obj_state(RGWRadosCtx *rctx, rgw_obj& obj, librados::IoCtx& io
s->exists = true;
bufferlist::iterator oiter = outbl.begin();
- ::decode(s->attrset, oiter);
+ try {
+ ::decode(s->attrset, oiter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: failed decoding s->attrset (obj=" << obj << "), aborting" << dendl;
+ return -EIO;
+ }
map<string, bufferlist>::iterator aiter;
for (aiter = s->attrset.begin(); aiter != s->attrset.end(); ++aiter) {
RGW_LOG(0) << "iter->first=" << aiter->first << dendl;
}
- ::decode(s->size, oiter);
- utime_t ut;
- ::decode(ut, oiter);
- s->mtime = ut.sec();
+
+ try {
+ ::decode(s->size, oiter);
+ utime_t ut;
+ ::decode(ut, oiter);
+ s->mtime = ut.sec();
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: failed decoding object (obj=" << obj << ") info (either size or mtime), aborting" << dendl;
+ }
s->has_attrs = true;
map<string, bufferlist>::iterator iter = s->attrset.find(RGW_ATTR_SHADOW_OBJ);
@@ -1473,8 +1488,9 @@ int RGWRados::get_bucket_stats(rgw_bucket& bucket, map<string, RGWBucketStats>&
return 0;
}
-int RGWRados::tmap_get(rgw_obj& obj, bufferlist& bl)
+int RGWRados::tmap_get(rgw_obj& obj, bufferlist& header, std::map<string, bufferlist>& m)
{
+ bufferlist bl;
librados::IoCtx io_ctx;
rgw_bucket& bucket = obj.bucket;
std::string& oid = obj.object;
@@ -1485,8 +1501,19 @@ int RGWRados::tmap_get(rgw_obj& obj, bufferlist& bl)
io_ctx.locator_set_key(obj.key);
r = io_ctx.tmap_get(oid, bl);
+ if (r < 0)
+ return r;
- return r;
+ try {
+ bufferlist::iterator iter = bl.begin();
+ ::decode(header, iter);
+ ::decode(m, iter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: tmap_get failed, caught buffer::error" << dendl;
+ return -EIO;
+ }
+
+ return 0;
}
@@ -1590,13 +1617,13 @@ int RGWRados::update_containers_stats(map<string, RGWBucketEnt>& m)
int count = 0;
map<string, RGWBucketEnt>::iterator iter;
- list<string> buckets_list;
+ list<string> pools_list;
for (iter = m.begin(); iter != m.end(); ++iter) {
- string bucket_name = iter->first;
- buckets_list.push_back(bucket_name);
+ string pool_name = iter->second.bucket.pool;
+ pools_list.push_back(pool_name);
}
map<std::string,librados::stats_map> sm;
- int r = rados->get_pool_stats(buckets_list, rgw_obj_category_main, sm);
+ int r = rados->get_pool_stats(pools_list, rgw_obj_category_main, sm);
if (r < 0)
return r;
@@ -1605,6 +1632,8 @@ int RGWRados::update_containers_stats(map<string, RGWBucketEnt>& m)
for (miter = sm.begin(), iter = m.begin(); miter != sm.end(), iter != m.end(); ++iter, ++miter) {
stats_map stats = miter->second;
stats_map::iterator stats_iter = stats.begin();
+ if (stats_iter == stats.end())
+ continue;
string bucket_name = miter->first;
RGWBucketEnt& ent = iter->second;
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index a78ff5a5d9a..bcc96835e29 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -2,10 +2,12 @@
#define CEPH_RGWRADOS_H
#include "include/rados/librados.hpp"
+#include "include/Context.h"
#include "rgw_access.h"
#include "rgw_common.h"
class RGWWatcher;
+class SafeTimer;
struct RGWObjState {
bool is_atomic;
@@ -77,6 +79,19 @@ class RGWRados : public RGWAccess
int set_buckets_auid(vector<rgw_bucket>& buckets, uint64_t auid);
+ Mutex lock;
+ SafeTimer *timer;
+
+ class C_Tick : public Context {
+ RGWRados *rados;
+ public:
+ C_Tick(RGWRados *_r) : rados(_r) {}
+ void finish(int r) {
+ rados->tick();
+ }
+ };
+
+
RGWWatcher *watcher;
uint64_t watch_handle;
librados::IoCtx root_pool_ctx;
@@ -106,7 +121,9 @@ class RGWRados : public RGWAccess
pair<string, bufferlist> *cmp_xattr);
int delete_obj_impl(void *ctx, std::string& id, rgw_obj& src_obj, bool sync);
public:
- RGWRados() : watcher(NULL), watch_handle(0) {}
+ RGWRados() : lock("rados_timer_lock"), timer(NULL), watcher(NULL), watch_handle(0) {}
+
+ void tick();
/** Initialize the RADOS instance and prepare to do other ops */
virtual int initialize(CephContext *cct);
@@ -228,7 +245,7 @@ public:
virtual int get_bucket_id(rgw_bucket& bucket, uint64_t *bucket_id);
virtual bool supports_tmap() { return true; }
- virtual int tmap_get(rgw_obj& obj, bufferlist& bl);
+ virtual int tmap_get(rgw_obj& obj, bufferlist& header, std::map<string, bufferlist>& m);
virtual int tmap_set(rgw_obj& obj, std::string& key, bufferlist& bl);
virtual int tmap_set(rgw_obj& obj, map<std::string, bufferlist>& m);
virtual int tmap_create(rgw_obj& obj, std::string& key, bufferlist& bl);
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index 13be0dcaa83..9cf35c555af 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -7,9 +7,9 @@
#include "rgw_formats.h"
#include "rgw_op.h"
#include "rgw_rest.h"
-#include "rgw_rest_os.h"
+#include "rgw_rest_swift.h"
#include "rgw_rest_s3.h"
-#include "rgw_os_auth.h"
+#include "rgw_swift_auth.h"
#include "rgw_formats.h"
@@ -52,6 +52,7 @@ const static struct rgw_html_errors RGW_HTML_ERRORS[] = {
{ EEXIST, 409, "BucketAlreadyExists" },
{ ENOTEMPTY, 409, "BucketNotEmpty" },
{ ERANGE, 416, "InvalidRange" },
+ { ERR_INTERNAL_ERROR, 500, "InternalError" },
};
void set_req_state_err(struct req_state *s, int err_no)
@@ -97,7 +98,7 @@ void dump_content_length(struct req_state *s, size_t len)
void dump_etag(struct req_state *s, const char *etag)
{
- if (s->prot_flags & RGW_REST_OPENSTACK)
+ if (s->prot_flags & RGW_REST_SWIFT)
CGI_PRINTF(s,"etag: %s\n", etag);
else
CGI_PRINTF(s,"ETag: \"%s\"\n", etag);
@@ -428,13 +429,13 @@ void init_entities_from_header(struct req_state *s)
pos = req.find('/');
if (pos >= 0) {
- const char *openstack_url_prefix = s->env->get("RGW_OPENSTACK_URL_PREFIX");
- bool cut_url = (openstack_url_prefix != NULL);
- if (!openstack_url_prefix)
- openstack_url_prefix = "v1";
+ const char *swift_url_prefix = s->env->get("RGW_SWIFT_URL_PREFIX");
+ bool cut_url = (swift_url_prefix != NULL);
+ if (!swift_url_prefix)
+ swift_url_prefix = "v1";
first = req.substr(0, pos);
- if (first.compare(openstack_url_prefix) == 0) {
- s->prot_flags |= RGW_REST_OPENSTACK;
+ if (first.compare(swift_url_prefix) == 0) {
+ s->prot_flags |= RGW_REST_SWIFT;
if (cut_url) {
next_tok(req, first, '/');
}
@@ -443,7 +444,7 @@ void init_entities_from_header(struct req_state *s)
first = req;
}
- if (s->prot_flags & RGW_REST_OPENSTACK) {
+ if (s->prot_flags & RGW_REST_SWIFT) {
s->format = 0;
delete s->formatter;
s->formatter = new RGWFormatter_Plain;
@@ -459,7 +460,7 @@ void init_entities_from_header(struct req_state *s)
}
}
- if (s->prot_flags & RGW_REST_OPENSTACK) {
+ if (s->prot_flags & RGW_REST_SWIFT) {
string ver;
string auth_key;
@@ -495,7 +496,7 @@ void init_entities_from_header(struct req_state *s)
}
if (strcmp(s->bucket_name, "auth") == 0)
- s->prot_flags |= RGW_REST_OPENSTACK_AUTH;
+ s->prot_flags |= RGW_REST_SWIFT_AUTH;
if (pos >= 0) {
string encoded_obj_str = req.substr(pos+1);
@@ -545,44 +546,44 @@ static void init_auth_info(struct req_state *s)
{
const char *p;
- s->x_amz_map.clear();
+ s->x_meta_map.clear();
for (int i=0; (p = s->fcgx->envp[i]); ++i) {
#define HTTP_X_AMZ "HTTP_X_AMZ"
if (strncmp(p, HTTP_X_AMZ, sizeof(HTTP_X_AMZ) - 1) == 0) {
- RGW_LOG(10) << "amz>> " << p << dendl;
- const char *amz = p+5; /* skip the HTTP_ part */
- const char *eq = strchr(amz, '=');
+ RGW_LOG(10) << "meta>> " << p << dendl;
+ const char *name = p+5; /* skip the HTTP_ part */
+ const char *eq = strchr(name, '=');
if (!eq) /* shouldn't happen! */
continue;
- int len = eq - amz;
- char amz_low[len + 1];
+ int len = eq - name;
+ char name_low[len + 1];
int j;
for (j=0; j<len; j++) {
- amz_low[j] = tolower(amz[j]);
- if (amz_low[j] == '_')
- amz_low[j] = '-';
+ name_low[j] = tolower(name[j]);
+ if (name_low[j] == '_')
+ name_low[j] = '-';
}
- amz_low[j] = 0;
+ name_low[j] = 0;
string val;
line_unfold(eq + 1, val);
map<string, string>::iterator iter;
- iter = s->x_amz_map.find(amz_low);
- if (iter != s->x_amz_map.end()) {
+ iter = s->x_meta_map.find(name_low);
+ if (iter != s->x_meta_map.end()) {
string old = iter->second;
int pos = old.find_last_not_of(" \t"); /* get rid of any whitespaces after the value */
old = old.substr(0, pos + 1);
old.append(",");
old.append(val);
- s->x_amz_map[amz_low] = old;
+ s->x_meta_map[name_low] = old;
} else {
- s->x_amz_map[amz_low] = val;
+ s->x_meta_map[name_low] = val;
}
}
}
map<string, string>::iterator iter;
- for (iter = s->x_amz_map.begin(); iter != s->x_amz_map.end(); ++iter) {
+ for (iter = s->x_meta_map.begin(); iter != s->x_meta_map.end(); ++iter) {
RGW_LOG(10) << "x>> " << iter->first << ":" << iter->second << dendl;
}
@@ -808,8 +809,8 @@ RGWOp *RGWHandler_REST::get_op()
RGWRESTMgr::RGWRESTMgr()
{
- m_os_handler = new RGWHandler_REST_OS;
- m_os_auth_handler = new RGWHandler_OS_Auth;
+ m_os_handler = new RGWHandler_REST_SWIFT;
+ m_os_auth_handler = new RGWHandler_SWIFT_Auth;
m_s3_handler = new RGWHandler_REST_S3;
}
@@ -827,9 +828,9 @@ RGWHandler *RGWRESTMgr::get_handler(struct req_state *s, FCGX_Request *fcgx,
*init_error = RGWHandler_REST::preprocess(s, fcgx);
- if (s->prot_flags & RGW_REST_OPENSTACK)
+ if (s->prot_flags & RGW_REST_SWIFT)
handler = m_os_handler;
- else if (s->prot_flags & RGW_REST_OPENSTACK_AUTH)
+ else if (s->prot_flags & RGW_REST_SWIFT_AUTH)
handler = m_os_auth_handler;
else
handler = m_s3_handler;
diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h
index 5bac7e74470..d94d29952d8 100644
--- a/src/rgw/rgw_rest.h
+++ b/src/rgw/rgw_rest.h
@@ -147,13 +147,13 @@ public:
virtual int authorize() = 0;
};
-class RGWHandler_REST_OS;
-class RGWHandler_OS_Auth;
+class RGWHandler_REST_SWIFT;
+class RGWHandler_SWIFT_Auth;
class RGWHandler_REST_S3;
class RGWRESTMgr {
- RGWHandler_REST_OS *m_os_handler;
- RGWHandler_OS_Auth *m_os_auth_handler;
+ RGWHandler_REST_SWIFT *m_os_handler;
+ RGWHandler_SWIFT_Auth *m_os_auth_handler;
RGWHandler_REST_S3 *m_s3_handler;
public:
diff --git a/src/rgw/rgw_rest_os.h b/src/rgw/rgw_rest_os.h
deleted file mode 100644
index 3c5c346c345..00000000000
--- a/src/rgw/rgw_rest_os.h
+++ /dev/null
@@ -1,116 +0,0 @@
-#ifndef CEPH_RGW_REST_OS_H
-#define CEPH_RGW_REST_OS_H
-#define TIME_BUF_SIZE 128
-
-#include "rgw_op.h"
-#include "rgw_rest.h"
-
-class RGWGetObj_REST_OS : public RGWGetObj_REST {
-public:
- RGWGetObj_REST_OS() {}
- ~RGWGetObj_REST_OS() {}
-
- int send_response(void *handle);
-};
-
-class RGWListBuckets_REST_OS : public RGWListBuckets_REST {
-public:
- RGWListBuckets_REST_OS() {}
- ~RGWListBuckets_REST_OS() {}
-
- void send_response();
-};
-
-class RGWListBucket_REST_OS : public RGWListBucket_REST {
-public:
- RGWListBucket_REST_OS() {
- limit_opt_name = "limit";
- default_max = 10000;
- }
- ~RGWListBucket_REST_OS() {}
-
- void send_response();
-};
-
-class RGWStatBucket_REST_OS : public RGWStatBucket_REST {
-public:
- RGWStatBucket_REST_OS() {}
- ~RGWStatBucket_REST_OS() {}
-
- void send_response();
-};
-
-class RGWCreateBucket_REST_OS : public RGWCreateBucket_REST {
-public:
- RGWCreateBucket_REST_OS() {}
- ~RGWCreateBucket_REST_OS() {}
-
- void send_response();
-};
-
-class RGWDeleteBucket_REST_OS : public RGWDeleteBucket_REST {
-public:
- RGWDeleteBucket_REST_OS() {}
- ~RGWDeleteBucket_REST_OS() {}
-
- void send_response();
-};
-
-class RGWPutObj_REST_OS : public RGWPutObj_REST {
-public:
- RGWPutObj_REST_OS() {}
- ~RGWPutObj_REST_OS() {}
-
- void send_response();
-};
-
-class RGWDeleteObj_REST_OS : public RGWDeleteObj_REST {
-public:
- RGWDeleteObj_REST_OS() {}
- ~RGWDeleteObj_REST_OS() {}
-
- void send_response();
-};
-
-class RGWCopyObj_REST_OS : public RGWCopyObj_REST {
-public:
- RGWCopyObj_REST_OS() {}
- ~RGWCopyObj_REST_OS() {}
-
- void send_response() {}
-};
-
-class RGWGetACLs_REST_OS : public RGWGetACLs_REST {
-public:
- RGWGetACLs_REST_OS() {}
- ~RGWGetACLs_REST_OS() {}
-
- void send_response() {}
-};
-
-class RGWPutACLs_REST_OS : public RGWPutACLs_REST {
-public:
- RGWPutACLs_REST_OS() : RGWPutACLs_REST() {}
- virtual ~RGWPutACLs_REST_OS() {}
-
- void send_response() {}
-};
-
-
-class RGWHandler_REST_OS : public RGWHandler_REST {
-protected:
-
- RGWOp *get_retrieve_obj_op(bool get_data);
- RGWOp *get_retrieve_op(bool get_data);
- RGWOp *get_create_op();
- RGWOp *get_delete_op();
- RGWOp *get_post_op() { return NULL; }
-
-public:
- RGWHandler_REST_OS() : RGWHandler_REST() {}
- virtual ~RGWHandler_REST_OS() {}
-
- int authorize();
-};
-
-#endif
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index e74e7b3ccee..4aa379ce4e9 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -489,7 +489,7 @@ static void get_canon_amz_hdr(struct req_state *s, string& dest)
{
dest = "";
map<string, string>::iterator iter;
- for (iter = s->x_amz_map.begin(); iter != s->x_amz_map.end(); ++iter) {
+ for (iter = s->x_meta_map.begin(); iter != s->x_meta_map.end(); ++iter) {
dest.append(iter->first);
dest.append(":");
dest.append(iter->second);
diff --git a/src/rgw/rgw_rest_os.cc b/src/rgw/rgw_rest_swift.cc
index 3c346bb3ff9..694e47fff95 100644
--- a/src/rgw/rgw_rest_os.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -1,11 +1,11 @@
#include "common/Formatter.h"
-#include "rgw_os.h"
-#include "rgw_rest_os.h"
+#include "rgw_swift.h"
+#include "rgw_rest_swift.h"
#include <sstream>
-void RGWListBuckets_REST_OS::send_response()
+void RGWListBuckets_REST_SWIFT::send_response()
{
set_req_state_err(s, ret);
dump_errno(s);
@@ -57,7 +57,7 @@ void RGWListBuckets_REST_OS::send_response()
s->formatter->reset();
}
-void RGWListBucket_REST_OS::send_response()
+void RGWListBucket_REST_SWIFT::send_response()
{
set_req_state_err(s, (ret < 0 ? ret : 0));
dump_errno(s);
@@ -125,7 +125,7 @@ static void dump_container_metadata(struct req_state *s, RGWBucketEnt& bucket)
CGI_PRINTF(s,"X-Container-Bytes-Used: %s\n", buf);
}
-void RGWStatBucket_REST_OS::send_response()
+void RGWStatBucket_REST_SWIFT::send_response()
{
if (ret >= 0)
dump_container_metadata(s, bucket);
@@ -139,7 +139,7 @@ void RGWStatBucket_REST_OS::send_response()
flush_formatter_to_req_state(s, s->formatter);
}
-void RGWCreateBucket_REST_OS::send_response()
+void RGWCreateBucket_REST_SWIFT::send_response()
{
if (ret)
set_req_state_err(s, ret);
@@ -148,7 +148,7 @@ void RGWCreateBucket_REST_OS::send_response()
flush_formatter_to_req_state(s, s->formatter);
}
-void RGWDeleteBucket_REST_OS::send_response()
+void RGWDeleteBucket_REST_SWIFT::send_response()
{
int r = ret;
if (!r)
@@ -160,7 +160,7 @@ void RGWDeleteBucket_REST_OS::send_response()
flush_formatter_to_req_state(s, s->formatter);
}
-void RGWPutObj_REST_OS::send_response()
+void RGWPutObj_REST_SWIFT::send_response()
{
if (!ret)
ret = 201; // "created"
@@ -171,7 +171,7 @@ void RGWPutObj_REST_OS::send_response()
flush_formatter_to_req_state(s, s->formatter);
}
-void RGWDeleteObj_REST_OS::send_response()
+void RGWDeleteObj_REST_SWIFT::send_response()
{
int r = ret;
if (!r)
@@ -183,7 +183,7 @@ void RGWDeleteObj_REST_OS::send_response()
flush_formatter_to_req_state(s, s->formatter);
}
-int RGWGetObj_REST_OS::send_response(void *handle)
+int RGWGetObj_REST_SWIFT::send_response(void *handle)
{
const char *content_type = NULL;
int orig_ret = ret;
@@ -239,14 +239,14 @@ send_data:
return 0;
}
-RGWOp *RGWHandler_REST_OS::get_retrieve_obj_op(bool get_data)
+RGWOp *RGWHandler_REST_SWIFT::get_retrieve_obj_op(bool get_data)
{
if (is_acl_op()) {
- return new RGWGetACLs_REST_OS;
+ return new RGWGetACLs_REST_SWIFT;
}
if (s->object) {
- RGWGetObj_REST_OS *get_obj_op = new RGWGetObj_REST_OS;
+ RGWGetObj_REST_SWIFT *get_obj_op = new RGWGetObj_REST_SWIFT;
get_obj_op->set_get_data(get_data);
return get_obj_op;
} else if (!s->bucket_name) {
@@ -254,54 +254,56 @@ RGWOp *RGWHandler_REST_OS::get_retrieve_obj_op(bool get_data)
}
if (get_data)
- return new RGWListBucket_REST_OS;
+ return new RGWListBucket_REST_SWIFT;
else
- return new RGWStatBucket_REST_OS;
+ return new RGWStatBucket_REST_SWIFT;
}
-RGWOp *RGWHandler_REST_OS::get_retrieve_op(bool get_data)
+RGWOp *RGWHandler_REST_SWIFT::get_retrieve_op(bool get_data)
{
if (s->bucket_name) {
if (is_acl_op()) {
- return new RGWGetACLs_REST_OS;
+ return new RGWGetACLs_REST_SWIFT;
}
return get_retrieve_obj_op(get_data);
}
- return new RGWListBuckets_REST_OS;
+ return new RGWListBuckets_REST_SWIFT;
}
-RGWOp *RGWHandler_REST_OS::get_create_op()
+RGWOp *RGWHandler_REST_SWIFT::get_create_op()
{
if (is_acl_op()) {
- return new RGWPutACLs_REST_OS;
+ return new RGWPutACLs_REST_SWIFT;
} else if (s->object) {
if (!s->copy_source)
- return new RGWPutObj_REST_OS;
+ return new RGWPutObj_REST_SWIFT;
else
- return new RGWCopyObj_REST_OS;
+ return new RGWCopyObj_REST_SWIFT;
} else if (s->bucket_name) {
- return new RGWCreateBucket_REST_OS;
+ return new RGWCreateBucket_REST_SWIFT;
}
return NULL;
}
-RGWOp *RGWHandler_REST_OS::get_delete_op()
+RGWOp *RGWHandler_REST_SWIFT::get_delete_op()
{
if (s->object)
- return new RGWDeleteObj_REST_OS;
+ return new RGWDeleteObj_REST_SWIFT;
else if (s->bucket_name)
- return new RGWDeleteBucket_REST_OS;
+ return new RGWDeleteBucket_REST_SWIFT;
return NULL;
}
-int RGWHandler_REST_OS::authorize()
+int RGWHandler_REST_SWIFT::authorize()
{
bool authorized = rgw_verify_os_token(s);
if (!authorized)
return -EPERM;
+ s->perm_mask = RGW_PERM_FULL_CONTROL;
+
return 0;
}
diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h
new file mode 100644
index 00000000000..c9bb6ff51aa
--- /dev/null
+++ b/src/rgw/rgw_rest_swift.h
@@ -0,0 +1,116 @@
+#ifndef CEPH_RGW_REST_SWIFT_H
+#define CEPH_RGW_REST_SWIFT_H
+#define TIME_BUF_SIZE 128
+
+#include "rgw_op.h"
+#include "rgw_rest.h"
+
+class RGWGetObj_REST_SWIFT : public RGWGetObj_REST {
+public:
+ RGWGetObj_REST_SWIFT() {}
+ ~RGWGetObj_REST_SWIFT() {}
+
+ int send_response(void *handle);
+};
+
+class RGWListBuckets_REST_SWIFT : public RGWListBuckets_REST {
+public:
+ RGWListBuckets_REST_SWIFT() {}
+ ~RGWListBuckets_REST_SWIFT() {}
+
+ void send_response();
+};
+
+class RGWListBucket_REST_SWIFT : public RGWListBucket_REST {
+public:
+ RGWListBucket_REST_SWIFT() {
+ limit_opt_name = "limit";
+ default_max = 10000;
+ }
+ ~RGWListBucket_REST_SWIFT() {}
+
+ void send_response();
+};
+
+class RGWStatBucket_REST_SWIFT : public RGWStatBucket_REST {
+public:
+ RGWStatBucket_REST_SWIFT() {}
+ ~RGWStatBucket_REST_SWIFT() {}
+
+ void send_response();
+};
+
+class RGWCreateBucket_REST_SWIFT : public RGWCreateBucket_REST {
+public:
+ RGWCreateBucket_REST_SWIFT() {}
+ ~RGWCreateBucket_REST_SWIFT() {}
+
+ void send_response();
+};
+
+class RGWDeleteBucket_REST_SWIFT : public RGWDeleteBucket_REST {
+public:
+ RGWDeleteBucket_REST_SWIFT() {}
+ ~RGWDeleteBucket_REST_SWIFT() {}
+
+ void send_response();
+};
+
+class RGWPutObj_REST_SWIFT : public RGWPutObj_REST {
+public:
+ RGWPutObj_REST_SWIFT() {}
+ ~RGWPutObj_REST_SWIFT() {}
+
+ void send_response();
+};
+
+class RGWDeleteObj_REST_SWIFT : public RGWDeleteObj_REST {
+public:
+ RGWDeleteObj_REST_SWIFT() {}
+ ~RGWDeleteObj_REST_SWIFT() {}
+
+ void send_response();
+};
+
+class RGWCopyObj_REST_SWIFT : public RGWCopyObj_REST {
+public:
+ RGWCopyObj_REST_SWIFT() {}
+ ~RGWCopyObj_REST_SWIFT() {}
+
+ void send_response() {}
+};
+
+class RGWGetACLs_REST_SWIFT : public RGWGetACLs_REST {
+public:
+ RGWGetACLs_REST_SWIFT() {}
+ ~RGWGetACLs_REST_SWIFT() {}
+
+ void send_response() {}
+};
+
+class RGWPutACLs_REST_SWIFT : public RGWPutACLs_REST {
+public:
+ RGWPutACLs_REST_SWIFT() : RGWPutACLs_REST() {}
+ virtual ~RGWPutACLs_REST_SWIFT() {}
+
+ void send_response() {}
+};
+
+
+class RGWHandler_REST_SWIFT : public RGWHandler_REST {
+protected:
+
+ RGWOp *get_retrieve_obj_op(bool get_data);
+ RGWOp *get_retrieve_op(bool get_data);
+ RGWOp *get_create_op();
+ RGWOp *get_delete_op();
+ RGWOp *get_post_op() { return NULL; }
+
+public:
+ RGWHandler_REST_SWIFT() : RGWHandler_REST() {}
+ virtual ~RGWHandler_REST_SWIFT() {}
+
+ int authorize();
+};
+
+#endif
diff --git a/src/rgw/rgw_os.cc b/src/rgw/rgw_swift.cc
index edecf55f70b..29894c00c4a 100644
--- a/src/rgw/rgw_os.cc
+++ b/src/rgw/rgw_swift.cc
@@ -6,8 +6,8 @@
#include <curl/easy.h>
#include "rgw_common.h"
-#include "rgw_os.h"
-#include "rgw_os_auth.h"
+#include "rgw_swift.h"
+#include "rgw_swift_auth.h"
#include "rgw_user.h"
@@ -15,7 +15,7 @@ static size_t read_http_header(void *ptr, size_t size, size_t nmemb, void *_info
{
size_t len = size * nmemb;
char line[len + 1];
- struct rgw_os_auth_info *info = (struct rgw_os_auth_info *)_info;
+ struct rgw_swift_auth_info *info = (struct rgw_swift_auth_info *)_info;
char *s = (char *)ptr, *end = (char *)ptr + len;
char *p = line;
@@ -56,14 +56,14 @@ static size_t read_http_header(void *ptr, size_t size, size_t nmemb, void *_info
return len;
}
-static int rgw_os_validate_token(const char *token, struct rgw_os_auth_info *info)
+static int rgw_swift_validate_token(const char *token, struct rgw_swift_auth_info *info)
{
CURL *curl_handle;
string auth_url = "http://127.0.0.1:11000/token";
char url_buf[auth_url.size() + 1 + strlen(token) + 1];
sprintf(url_buf, "%s/%s", auth_url.c_str(), token);
- RGW_LOG(10) << "rgw_os_validate_token url=" << url_buf << dendl;
+ RGW_LOG(10) << "rgw_swift_validate_token url=" << url_buf << dendl;
curl_handle = curl_easy_init();
@@ -83,37 +83,37 @@ static int rgw_os_validate_token(const char *token, struct rgw_os_auth_info *inf
bool rgw_verify_os_token(req_state *s)
{
if (strncmp(s->os_auth_token, "AUTH_rgwtk", 10) == 0) {
- int ret = rgw_os_verify_signed_token(s->os_auth_token, s->user);
+ int ret = rgw_swift_verify_signed_token(s->os_auth_token, s->user);
if (ret < 0)
return false;
return true;
}
- struct rgw_os_auth_info info;
+ struct rgw_swift_auth_info info;
memset(&info, 0, sizeof(info));
info.status = 401; // start with access denied, validate_token might change that
- int ret = rgw_os_validate_token(s->os_auth_token, &info);
+ int ret = rgw_swift_validate_token(s->os_auth_token, &info);
if (ret < 0)
return ret;
if (!info.user) {
- RGW_LOG(0) << "openstack auth didn't authorize a user" << dendl;
+ RGW_LOG(0) << "swift auth didn't authorize a user" << dendl;
return false;
}
s->os_user = info.user;
s->os_groups = info.auth_groups;
- string openstack_user = s->os_user;
+ string swift_user = s->os_user;
- RGW_LOG(10) << "openstack user=" << s->os_user << dendl;
+ RGW_LOG(10) << "swift user=" << s->os_user << dendl;
- if (rgw_get_user_info_by_openstack(openstack_user, s->user) < 0) {
- RGW_LOG(0) << "couldn't map openstack user" << dendl;
+ if (rgw_get_user_info_by_swift(swift_user, s->user) < 0) {
+ RGW_LOG(0) << "couldn't map swift user" << dendl;
return false;
}
diff --git a/src/rgw/rgw_os.h b/src/rgw/rgw_swift.h
index 2114ecaa294..16204bcb2d9 100644
--- a/src/rgw/rgw_os.h
+++ b/src/rgw/rgw_swift.h
@@ -1,11 +1,11 @@
-#ifndef CEPH_RGW_OS_H
-#define CEPH_RGW_OS_H
+#ifndef CEPH_RGW_SWIFT_H
+#define CEPH_RGW_SWIFT_H
#include "rgw_common.h"
-struct rgw_os_auth_info {
+struct rgw_swift_auth_info {
int status;
char *auth_groups;
char *user;
diff --git a/src/rgw/rgw_os_auth.cc b/src/rgw/rgw_swift_auth.cc
index a382ea6f102..afcd03d68bc 100644
--- a/src/rgw/rgw_os_auth.cc
+++ b/src/rgw/rgw_swift_auth.cc
@@ -1,4 +1,4 @@
-#include "rgw_os_auth.h"
+#include "rgw_swift_auth.h"
#include "rgw_rest.h"
#include "common/ceph_crypto.h"
@@ -8,7 +8,7 @@
using namespace ceph::crypto;
-static RGW_OS_Auth_Get rgw_os_auth_get;
+static RGW_SWIFT_Auth_Get rgw_swift_auth_get;
static int build_token(string& os_user, string& key, uint64_t nonce, utime_t& expiration, bufferlist& bl)
{
@@ -45,14 +45,14 @@ static int encode_token(string& os_user, string& key, bufferlist& bl)
return ret;
utime_t expiration = ceph_clock_now(g_ceph_context);
- expiration += RGW_OS_TOKEN_EXPIRATION; // 15 minutes
+ expiration += RGW_SWIFT_TOKEN_EXPIRATION; // 15 minutes
ret = build_token(os_user, key, nonce, expiration, bl);
return ret;
}
-int rgw_os_verify_signed_token(const char *token, RGWUserInfo& info)
+int rgw_swift_verify_signed_token(const char *token, RGWUserInfo& info)
{
if (strncmp(token, "AUTH_rgwtk", 10) != 0)
return -EINVAL;
@@ -83,7 +83,7 @@ int rgw_os_verify_signed_token(const char *token, RGWUserInfo& info)
::decode(os_user, iter);
::decode(nonce, iter);
::decode(expiration, iter);
- } catch (buffer::error *err) {
+ } catch (buffer::error& err) {
RGW_LOG(0) << "failed to decode token: caught exception" << dendl;
return -EINVAL;
}
@@ -92,13 +92,13 @@ int rgw_os_verify_signed_token(const char *token, RGWUserInfo& info)
return -EPERM;
}
- if ((ret = rgw_get_user_info_by_openstack(os_user, info)) < 0)
+ if ((ret = rgw_get_user_info_by_swift(os_user, info)) < 0)
return ret;
RGW_LOG(10) << "os_user=" << os_user << dendl;
bufferlist tok;
- ret = build_token(os_user, info.openstack_key, nonce, expiration, tok);
+ ret = build_token(os_user, info.swift_key, nonce, expiration, tok);
if (ret < 0)
return ret;
@@ -117,23 +117,23 @@ int rgw_os_verify_signed_token(const char *token, RGWUserInfo& info)
return 0;
}
-void RGW_OS_Auth_Get::execute()
+void RGW_SWIFT_Auth_Get::execute()
{
int ret = -EPERM;
- RGW_LOG(20) << "RGW_OS_Auth_Get::execute()" << dendl;
+ RGW_LOG(20) << "RGW_SWIFT_Auth_Get::execute()" << dendl;
const char *key = s->env->get("HTTP_X_AUTH_KEY");
const char *user = s->env->get("HTTP_X_AUTH_USER");
- const char *url_prefix = s->env->get("RGW_OPENSTACK_URL_PREFIX");
- const char *os_url = s->env->get("RGW_OPENSTACK_URL");
+ const char *url_prefix = s->env->get("RGW_SWIFT_URL_PREFIX");
+ const char *os_url = s->env->get("RGW_SWIFT_URL");
string user_str = user;
RGWUserInfo info;
bufferlist bl;
if (!os_url || !url_prefix) {
- RGW_LOG(0) << "server is misconfigured, missing RGW_OPENSTACK_URL_PREFIX or RGW_OPENSTACK_URL" << dendl;
+ RGW_LOG(0) << "server is misconfigured, missing RGW_SWIFT_URL_PREFIX or RGW_SWIFT_URL" << dendl;
ret = -EINVAL;
goto done;
}
@@ -141,18 +141,18 @@ void RGW_OS_Auth_Get::execute()
if (!key || !user)
goto done;
- if ((ret = rgw_get_user_info_by_openstack(user_str, info)) < 0)
+ if ((ret = rgw_get_user_info_by_swift(user_str, info)) < 0)
goto done;
- if (info.openstack_key.compare(key) != 0) {
- RGW_LOG(0) << "RGW_OS_Auth_Get::execute(): bad openstack key" << dendl;
+ if (info.swift_key.compare(key) != 0) {
+ RGW_LOG(0) << "RGW_SWIFT_Auth_Get::execute(): bad swift key" << dendl;
ret = -EPERM;
goto done;
}
CGI_PRINTF(s, "X-Storage-Url: %s/%s/v1/AUTH_rgw\n", os_url, url_prefix);
- if ((ret = encode_token(info.openstack_name, info.openstack_key, bl)) < 0)
+ if ((ret = encode_token(info.swift_name, info.swift_key, bl)) < 0)
goto done;
{
@@ -170,17 +170,17 @@ done:
end_header(s);
}
-int RGWHandler_OS_Auth::authorize()
+int RGWHandler_SWIFT_Auth::authorize()
{
return 0;
}
-RGWOp *RGWHandler_OS_Auth::get_op()
+RGWOp *RGWHandler_SWIFT_Auth::get_op()
{
RGWOp *op;
switch (s->op) {
case OP_GET:
- op = &rgw_os_auth_get;
+ op = &rgw_swift_auth_get;
break;
default:
return NULL;
@@ -192,7 +192,7 @@ RGWOp *RGWHandler_OS_Auth::get_op()
return op;
}
-void RGWHandler_OS_Auth::put_op(RGWOp *op)
+void RGWHandler_SWIFT_Auth::put_op(RGWOp *op)
{
}
diff --git a/src/rgw/rgw_swift_auth.h b/src/rgw/rgw_swift_auth.h
new file mode 100644
index 00000000000..95909a96015
--- /dev/null
+++ b/src/rgw/rgw_swift_auth.h
@@ -0,0 +1,30 @@
+#ifndef CEPH_RGW_SWIFT_AUTH_H
+#define CEPH_RGW_SWIFT_AUTH_H
+
+#include "rgw_op.h"
+
+#define RGW_SWIFT_TOKEN_EXPIRATION (15 * 60)
+
+extern int rgw_swift_verify_signed_token(const char *token, RGWUserInfo& info);
+
+class RGW_SWIFT_Auth_Get : public RGWOp {
+public:
+ RGW_SWIFT_Auth_Get() {}
+ ~RGW_SWIFT_Auth_Get() {}
+
+ int verify_permission() { return 0; }
+ void execute();
+};
+
+class RGWHandler_SWIFT_Auth : public RGWHandler {
+public:
+ RGWHandler_SWIFT_Auth() {}
+ ~RGWHandler_SWIFT_Auth() {}
+ RGWOp *get_op();
+ void put_op(RGWOp *op);
+
+ int authorize();
+ int read_permissions() { return 0; }
+};
+
+#endif
diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc
index 98317ca2015..f47123144fe 100644
--- a/src/rgw/rgw_user.cc
+++ b/src/rgw/rgw_user.cc
@@ -15,7 +15,7 @@ using namespace std;
static rgw_bucket ui_key_bucket(USER_INFO_POOL_NAME);
static rgw_bucket ui_email_bucket(USER_INFO_EMAIL_POOL_NAME);
-static rgw_bucket ui_openstack_bucket(USER_INFO_OPENSTACK_POOL_NAME);
+static rgw_bucket ui_swift_bucket(USER_INFO_SWIFT_POOL_NAME);
static rgw_bucket ui_uid_bucket(USER_INFO_UID_POOL_NAME);
static rgw_bucket pi_pool_bucket(POOL_INFO_POOL_NAME);
@@ -49,12 +49,12 @@ int rgw_store_user_info(RGWUserInfo& info)
int ret;
map<string,bufferlist> attrs;
- if (info.openstack_name.size()) {
- /* check if openstack mapping exists */
+ if (info.swift_name.size()) {
+ /* check if swift mapping exists */
RGWUserInfo inf;
- int r = rgw_get_user_info_by_openstack(info.openstack_name, inf);
+ int r = rgw_get_user_info_by_swift(info.swift_name, inf);
if (r >= 0 && inf.user_id.compare(info.user_id) != 0) {
- RGW_LOG(0) << "can't store user info, openstack id already mapped to another user" << dendl;
+ RGW_LOG(0) << "can't store user info, swift id already mapped to another user" << dendl;
return -EEXIST;
}
}
@@ -99,8 +99,8 @@ int rgw_store_user_info(RGWUserInfo& info)
}
}
- if (info.openstack_name.size())
- ret = rgw_put_obj(info.user_id, ui_openstack_bucket, info.openstack_name, uid_bl.c_str(), uid_bl.length());
+ if (info.swift_name.size())
+ ret = rgw_put_obj(info.user_id, ui_swift_bucket, info.swift_name, uid_bl.c_str(), uid_bl.length());
return ret;
}
@@ -115,9 +115,14 @@ int rgw_get_user_info_from_index(string& key, rgw_bucket& bucket, RGWUserInfo& i
return ret;
bufferlist::iterator iter = bl.begin();
- ::decode(uid, iter);
- if (!iter.end())
- info.decode(iter);
+ try {
+ ::decode(uid, iter);
+ if (!iter.end())
+ info.decode(iter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: failed to decode user info, caught buffer::error" << dendl;
+ return -EIO;
+ }
return 0;
}
@@ -141,12 +146,12 @@ int rgw_get_user_info_by_email(string& email, RGWUserInfo& info)
}
/**
- * Given an openstack username, finds the user_info associated with it.
+ * Given an swift username, finds the user_info associated with it.
* returns: 0 on success, -ERR# on failure (including nonexistence)
*/
-extern int rgw_get_user_info_by_openstack(string& openstack_name, RGWUserInfo& info)
+extern int rgw_get_user_info_by_swift(string& swift_name, RGWUserInfo& info)
{
- return rgw_get_user_info_from_index(openstack_name, ui_openstack_bucket, info);
+ return rgw_get_user_info_from_index(swift_name, ui_swift_bucket, info);
}
/**
@@ -173,7 +178,12 @@ static int rgw_read_buckets_from_attr(string& user_id, RGWUserBuckets& buckets)
return ret;
bufferlist::iterator iter = bl.begin();
- buckets.decode(iter);
+ try {
+ buckets.decode(iter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: failed to decode buckets info, caught buffer::error" << dendl;
+ return -EIO;
+ }
return 0;
}
@@ -230,13 +240,18 @@ int rgw_read_user_buckets(string user_id, RGWUserBuckets& buckets, bool need_sta
bufferlist::iterator p = bl.begin();
bufferlist header;
map<string,bufferlist> m;
- ::decode(header, p);
- ::decode(m, p);
- for (map<string,bufferlist>::iterator q = m.begin(); q != m.end(); q++) {
- bufferlist::iterator iter = q->second.begin();
- RGWBucketEnt bucket;
- ::decode(bucket, iter);
- buckets.add(bucket);
+ try {
+ ::decode(header, p);
+ ::decode(m, p);
+ for (map<string,bufferlist>::iterator q = m.begin(); q != m.end(); q++) {
+ bufferlist::iterator iter = q->second.begin();
+ RGWBucketEnt bucket;
+ ::decode(bucket, iter);
+ buckets.add(bucket);
+ }
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: failed to decode bucket information, caught buffer::error" << dendl;
+ return -EIO;
}
} else {
ret = rgw_read_buckets_from_attr(user_id, buckets);
@@ -392,9 +407,9 @@ int rgw_remove_email_index(string& uid, string& email)
return ret;
}
-int rgw_remove_openstack_name_index(string& uid, string& openstack_name)
+int rgw_remove_swift_name_index(string& uid, string& swift_name)
{
- rgw_obj obj(ui_openstack_bucket, openstack_name);
+ rgw_obj obj(ui_swift_bucket, swift_name);
int ret = rgwstore->delete_obj(NULL, uid, obj);
return ret;
}
@@ -421,34 +436,49 @@ int rgw_delete_user(RGWUserInfo& info, bool purge_data) {
}
map<string, RGWAccessKey>::iterator kiter = info.access_keys.begin();
for (; kiter != info.access_keys.end(); ++kiter) {
+ RGW_LOG(0) << "removing key index: " << kiter->first << dendl;
ret = rgw_remove_key_index(kiter->second);
- if (ret < 0 && ret != -ENOENT)
- RGW_LOG(0) << "ERROR: could not remove " << kiter->first << " (access key object), should be fixed manually (err=" << ret << ")" << dendl;
+ if (ret < 0 && ret != -ENOENT) {
+ RGW_LOG(0) << "ERROR: could not remove " << kiter->first << " (access key object), should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
}
- rgw_obj uid_obj(ui_uid_bucket, info.user_id);
- ret = rgwstore->delete_obj(NULL, info.user_id, uid_obj);
- if (ret < 0 && ret != -ENOENT)
- RGW_LOG(0) << "ERROR: could not remove " << info.user_id << ":" << uid_obj << ", should be fixed manually (err=" << ret << ")" << dendl;
-
- string buckets_obj_id;
- get_buckets_obj(info.user_id, buckets_obj_id);
- rgw_obj uid_bucks(ui_uid_bucket, buckets_obj_id);
- ret = rgwstore->delete_obj(NULL, info.user_id, uid_bucks);
- if (ret < 0 && ret != -ENOENT)
- RGW_LOG(0) << "ERROR: could not remove " << info.user_id << ":" << uid_bucks << ", should be fixed manually (err=" << ret << ")" << dendl;
-
-
rgw_obj email_obj(ui_email_bucket, info.user_email);
+ RGW_LOG(0) << "removing email index: " << info.user_email << dendl;
ret = rgwstore->delete_obj(NULL, info.user_id, email_obj);
- if (ret < 0 && ret != -ENOENT)
- RGW_LOG(0) << "ERROR: could not remove " << info.user_id << ":" << email_obj << ", should be fixed manually (err=" << ret << ")" << dendl;
+ if (ret < 0 && ret != -ENOENT) {
+ RGW_LOG(0) << "ERROR: could not remove " << info.user_id << ":" << email_obj << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
if (purge_data) {
+ RGW_LOG(0) << "purging user buckets" << dendl;
ret = rgwstore->purge_buckets(info.user_id, buckets_vec);
- if (ret < 0)
+ if (ret < 0 && ret != -ENOENT) {
RGW_LOG(0) << "ERROR: delete_buckets returned " << ret << dendl;
+ return ret;
+ }
+ }
+
+ string buckets_obj_id;
+ get_buckets_obj(info.user_id, buckets_obj_id);
+ rgw_obj uid_bucks(ui_uid_bucket, buckets_obj_id);
+ RGW_LOG(0) << "removing user buckets index" << dendl;
+ ret = rgwstore->delete_obj(NULL, info.user_id, uid_bucks);
+ if (ret < 0 && ret != -ENOENT) {
+ RGW_LOG(0) << "ERROR: could not remove " << info.user_id << ":" << uid_bucks << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+
+ rgw_obj uid_obj(ui_uid_bucket, info.user_id);
+ RGW_LOG(0) << "removing user index: " << info.user_id << dendl;
+ ret = rgwstore->delete_obj(NULL, info.user_id, uid_obj);
+ if (ret < 0 && ret != -ENOENT) {
+ RGW_LOG(0) << "ERROR: could not remove " << info.user_id << ":" << uid_obj << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
}
+
return 0;
}
@@ -485,7 +515,12 @@ int rgw_retrieve_pool_info(int64_t pool_id, RGWPoolInfo& pool_info)
return ret;
}
bufferlist::iterator iter = bl.begin();
- ::decode(pool_info, iter);
+ try {
+ ::decode(pool_info, iter);
+ } catch (buffer::error& err) {
+ RGW_LOG(0) << "ERROR: failed to decode pool information, caught buffer::error" << dendl;
+ return -EIO;
+ }
return 0;
}
diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h
index c87ec8cf277..92b964f6941 100644
--- a/src/rgw/rgw_user.h
+++ b/src/rgw/rgw_user.h
@@ -11,7 +11,7 @@ using namespace std;
#define USER_INFO_POOL_NAME ".users"
#define USER_INFO_EMAIL_POOL_NAME ".users.email"
-#define USER_INFO_OPENSTACK_POOL_NAME ".users.openstack"
+#define USER_INFO_SWIFT_POOL_NAME ".users.swift"
#define USER_INFO_UID_POOL_NAME ".users.uid"
#define RGW_USER_ANON_ID "anonymous"
@@ -53,15 +53,15 @@ extern int rgw_store_user_info(RGWUserInfo& info);
*/
extern int rgw_get_user_info_by_uid(string& user_id, RGWUserInfo& info);
/**
- * Given an openstack username, finds the user info associated with it.
+ * Given an swift username, finds the user info associated with it.
* returns: 0 on success, -ERR# on failure (including nonexistence)
*/
extern int rgw_get_user_info_by_email(string& email, RGWUserInfo& info);
/**
- * Given an openstack username, finds the user info associated with it.
+ * Given an swift username, finds the user info associated with it.
* returns: 0 on success, -ERR# on failure (including nonexistence)
*/
-extern int rgw_get_user_info_by_openstack(string& openstack_name, RGWUserInfo& info);
+extern int rgw_get_user_info_by_swift(string& swift_name, RGWUserInfo& info);
/**
* Given an access key, finds the user info associated with it.
* returns: 0 on success, -ERR# on failure (including nonexistence)
@@ -147,7 +147,7 @@ extern int rgw_remove_bucket(string user_id, rgw_bucket& bucket, bool purge_data
extern int rgw_remove_key_index(RGWAccessKey& access_key);
extern int rgw_remove_uid_index(string& uid);
extern int rgw_remove_email_index(string& uid, string& email);
-extern int rgw_remove_openstack_name_index(string& uid, string& openstack_name);
+extern int rgw_remove_swift_name_index(string& uid, string& swift_name);
extern int rgw_store_pool_info(int64_t pool_id, RGWPoolInfo& pool_info);
extern int rgw_retrieve_pool_info(int64_t pool_id, RGWPoolInfo& pool_info);
diff --git a/src/test/cli/radosgw_admin/help.t b/src/test/cli/radosgw_admin/help.t
index adc6259534c..8860c3cdd02 100644
--- a/src/test/cli/radosgw_admin/help.t
+++ b/src/test/cli/radosgw_admin/help.t
@@ -27,11 +27,11 @@
--uid=<id> user id
--subuser=<name> subuser name
--access-key=<key> S3 access key
- --os-user=<group:name> OpenStack user
+ --swift-user=<group:name> Swift user
--email=<email>
--auth_uid=<auid> librados uid
--secret=<key> S3 key
- --os-secret=<key> OpenStack key
+ --swift-secret=<key> Swift key
--gen-access-key generate random access key
--gen-secret generate random secret key
--access=<access> Set access permissions for sub-user, should be one
diff --git a/src/test/store_test.cc b/src/test/store_test.cc
index a3c9f529e56..10fb6564dc2 100644
--- a/src/test/store_test.cc
+++ b/src/test/store_test.cc
@@ -253,7 +253,7 @@ public:
// hash
//boost::binomial_distribution<uint32_t> bin(0xFFFFFF, 0.5);
++seq;
- return hobject_t(name, CEPH_NOSNAP, rand());
+ return hobject_t(name, string(), CEPH_NOSNAP, rand());
}
};
@@ -469,7 +469,7 @@ TEST_F(StoreTest, HashCollisionTest) {
if (!(i % 5)) {
cerr << "Object " << i << std::endl;
}
- hobject_t hoid(string(buf) + base, CEPH_NOSNAP, 0);
+ hobject_t hoid(string(buf) + base, string(), CEPH_NOSNAP, 0);
{
ObjectStore::Transaction t;
t.touch(cid, hoid);