summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Durgin <josh.durgin@inktank.com>2013-07-24 18:14:29 -0700
committerJosh Durgin <josh.durgin@inktank.com>2013-07-25 18:44:10 -0700
commit02ec1a7502fe0aa612be6c088c4a0efda022da98 (patch)
treebeb6af50eeba386ba72339c01651face7acefbec
parenta4b9707e5b5d38b1e8929f52c5f71d5f9e519ceb (diff)
downloadceph-02ec1a7502fe0aa612be6c088c4a0efda022da98.tar.gz
ObjectCacher: store image_overlap in ObjectSet
Pass the image_overlap to the WritebackHandler functions that need it, may_copy_on_write() and write(), so that they don't need to take any locks to read the current overlap. This is a step towards changing the lock ordering in librbd so the cache lock may be taken inside of the snap and parent locks. Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r--src/client/Inode.h2
-rw-r--r--src/client/ObjecterWriteback.h5
-rw-r--r--src/librbd/ImageCtx.cc11
-rw-r--r--src/librbd/ImageCtx.h1
-rw-r--r--src/librbd/LibrbdWriteback.cc19
-rw-r--r--src/librbd/LibrbdWriteback.h5
-rw-r--r--src/librbd/internal.cc35
-rw-r--r--src/osdc/ObjectCacher.cc5
-rw-r--r--src/osdc/ObjectCacher.h11
-rw-r--r--src/osdc/WritebackHandler.h7
-rw-r--r--src/test/osdc/FakeWriteback.cc3
-rw-r--r--src/test/osdc/FakeWriteback.h5
-rw-r--r--src/test/osdc/object_cacher_stress.cc2
13 files changed, 69 insertions, 42 deletions
diff --git a/src/client/Inode.h b/src/client/Inode.h
index af4830b5c77..96450c7b85c 100644
--- a/src/client/Inode.h
+++ b/src/client/Inode.h
@@ -214,7 +214,7 @@ class Inode {
exporting_issued(0), exporting_mds(-1), exporting_mseq(0),
cap_item(this), flushing_cap_item(this), last_flush_tid(0),
snaprealm(0), snaprealm_item(this), snapdir_parent(0),
- oset((void *)this, newlayout->fl_pg_pool, ino),
+ oset((void *)this, newlayout->fl_pg_pool, ino, 0),
reported_size(0), wanted_max_size(0), requested_max_size(0),
_ref(0), ll_ref(0),
dir(0), dn_set()
diff --git a/src/client/ObjecterWriteback.h b/src/client/ObjecterWriteback.h
index 9a10fb48a06..9fd4de26d62 100644
--- a/src/client/ObjecterWriteback.h
+++ b/src/client/ObjecterWriteback.h
@@ -19,14 +19,15 @@ class ObjecterWriteback : public WritebackHandler {
trunc_size, trunc_seq, onfinish);
}
- virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid) {
+ virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid, uint64_t image_overlap) {
return false;
}
virtual tid_t write(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, const SnapContext& snapc,
const bufferlist &bl, utime_t mtime, uint64_t trunc_size,
- __u32 trunc_seq, Context *oncommit) {
+ __u32 trunc_seq, uint64_t image_overlap,
+ Context *oncommit) {
return m_objecter->write_trunc(oid, oloc, off, len, snapc, bl, mtime, 0,
trunc_size, trunc_seq, NULL, oncommit);
}
diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc
index 41518b67698..8ade3de7539 100644
--- a/src/librbd/ImageCtx.cc
+++ b/src/librbd/ImageCtx.cc
@@ -91,7 +91,7 @@ namespace librbd {
cct->_conf->rbd_cache_target_dirty,
cct->_conf->rbd_cache_max_dirty_age,
cct->_conf->rbd_cache_block_writes_upfront);
- object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0);
+ object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0, 0);
object_set->return_enoent = true;
object_cacher->start();
}
@@ -470,6 +470,15 @@ namespace librbd {
return 0;
}
+ void ImageCtx::refresh_overlap(uint64_t overlap)
+ {
+ if (!object_cacher)
+ return;
+ cache_lock.Lock();
+ object_set->rbd_image_overlap = overlap;
+ cache_lock.Unlock();
+ }
+
void ImageCtx::aio_read_from_cache(object_t o, bufferlist *bl, size_t len,
uint64_t off, Context *onfinish) {
snap_lock.get_read();
diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h
index 29ca2f197ea..cf8290edc8a 100644
--- a/src/librbd/ImageCtx.h
+++ b/src/librbd/ImageCtx.h
@@ -127,6 +127,7 @@ namespace librbd {
uint64_t get_parent_snap_id(librados::snap_t in_snap_id) const;
int get_parent_overlap(librados::snap_t in_snap_id,
uint64_t *overlap) const;
+ void refresh_overlap(uint64_t overlap);
void aio_read_from_cache(object_t o, bufferlist *bl, size_t len,
uint64_t off, Context *onfinish);
void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off,
diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc
index 737f1f33cd7..b8f4b536e9d 100644
--- a/src/librbd/LibrbdWriteback.cc
+++ b/src/librbd/LibrbdWriteback.cc
@@ -109,13 +109,8 @@ namespace librbd {
assert(r >= 0);
}
- bool LibrbdWriteback::may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid)
+ bool LibrbdWriteback::may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid, uint64_t image_overlap)
{
- m_ictx->parent_lock.get_read();
- uint64_t overlap = 0;
- m_ictx->get_parent_overlap(snapid, &overlap);
- m_ictx->parent_lock.put_read();
-
uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix);
// reverse map this object extent onto the parent
@@ -123,7 +118,8 @@ namespace librbd {
Striper::extent_to_file(m_ictx->cct, &m_ictx->layout,
object_no, 0, m_ictx->layout.fl_object_size,
objectx);
- uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap);
+ uint64_t object_overlap = m_ictx->prune_parent_extents(objectx,
+ image_overlap);
bool may = object_overlap > 0;
ldout(m_ictx->cct, 10) << "may_copy_on_write " << oid << " " << read_off << "~" << read_len << " = " << may << dendl;
return may;
@@ -135,13 +131,9 @@ namespace librbd {
const SnapContext& snapc,
const bufferlist &bl, utime_t mtime,
uint64_t trunc_size, __u32 trunc_seq,
+ uint64_t image_overlap,
Context *oncommit)
{
- m_ictx->parent_lock.get_read();
- uint64_t overlap = 0;
- m_ictx->get_parent_overlap(LIBRADOS_SNAP_HEAD, &overlap);
- m_ictx->parent_lock.put_read();
-
uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix);
// reverse map this object extent onto the parent
@@ -149,7 +141,8 @@ namespace librbd {
Striper::extent_to_file(m_ictx->cct, &m_ictx->layout,
object_no, 0, m_ictx->layout.fl_object_size,
objectx);
- uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap);
+ uint64_t object_overlap = m_ictx->prune_parent_extents(objectx,
+ image_overlap);
write_result_d *result = new write_result_d(oid.name, oncommit);
m_writes[oid.name].push(result);
ldout(m_ictx->cct, 20) << "write will wait for result " << result << dendl;
diff --git a/src/librbd/LibrbdWriteback.h b/src/librbd/LibrbdWriteback.h
index ba8ff1f114d..8d7d558d308 100644
--- a/src/librbd/LibrbdWriteback.h
+++ b/src/librbd/LibrbdWriteback.h
@@ -29,13 +29,14 @@ namespace librbd {
Context *onfinish);
// Determine whether a read to this extent could be affected by a write-triggered copy-on-write
- virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid);
+ virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid, uint64_t image_overlap);
// Note that oloc, trunc_size, and trunc_seq are ignored
virtual tid_t write(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, const SnapContext& snapc,
const bufferlist &bl, utime_t mtime, uint64_t trunc_size,
- __u32 trunc_seq, Context *oncommit);
+ __u32 trunc_seq, uint64_t image_overlap,
+ Context *oncommit);
struct write_result_d {
bool done;
diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
index 1fd7942f3b8..bf8a0afc062 100644
--- a/src/librbd/internal.cc
+++ b/src/librbd/internal.cc
@@ -2000,19 +2000,30 @@ reprotect_and_return_err:
int _snap_set(ImageCtx *ictx, const char *snap_name)
{
- RWLock::WLocker l1(ictx->snap_lock);
- RWLock::WLocker l2(ictx->parent_lock);
- int r;
- if ((snap_name != NULL) && (strlen(snap_name) != 0)) {
- r = ictx->snap_set(snap_name);
- } else {
- ictx->snap_unset();
- r = 0;
- }
- if (r < 0) {
- return r;
+ uint64_t overlap = 0;
+
+ {
+ RWLock::WLocker l1(ictx->snap_lock);
+ RWLock::WLocker l2(ictx->parent_lock);
+ int r;
+ if ((snap_name != NULL) && (strlen(snap_name) != 0)) {
+ r = ictx->snap_set(snap_name);
+ } else {
+ ictx->snap_unset();
+ r = 0;
+ }
+ if (r < 0) {
+ return r;
+ }
+ refresh_parent(ictx);
+ assert(0 == ictx->get_parent_overlap(ictx->snap_id, &overlap));
}
- refresh_parent(ictx);
+ /* overlap will only change when shrinking an image, which isn't
+ * safe while the image is in use. Thus, we don't need to keep
+ * snap_lock or parent_lock held. Dropping them before taking
+ * cache_lock avoids a lock ordering issue.
+ */
+ ictx->refresh_overlap(overlap);
return 0;
}
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 51fad699555..d01e5a5f29c 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -795,6 +795,7 @@ void ObjectCacher::bh_write(BufferHead *bh)
bh->start(), bh->length(),
bh->snapc, bh->bl, bh->last_write,
bh->ob->truncate_size, bh->ob->truncate_seq,
+ bh->ob->oset->rbd_image_overlap,
oncommit);
ldout(cct, 20) << " tid " << tid << " on " << bh->ob->get_oid() << dendl;
@@ -831,7 +832,7 @@ void ObjectCacher::bh_write_commit(int64_t poolid, sobject_t oid, loff_t start,
ldout(cct, 10) << "bh_write_commit marking exists on " << *ob << dendl;
ob->exists = true;
- if (writeback_handler.may_copy_on_write(ob->get_oid(), start, length, ob->get_snap())) {
+ if (writeback_handler.may_copy_on_write(ob->get_oid(), start, length, ob->get_snap(), ob->oset->rbd_image_overlap)) {
ldout(cct, 10) << "bh_write_commit may copy on write, clearing complete on " << *ob << dendl;
ob->complete = false;
}
@@ -1035,7 +1036,7 @@ int ObjectCacher::_readx(OSDRead *rd, ObjectSet *oset, Context *onfinish,
ldout(cct, 10) << "readx object !exists, 1 extent..." << dendl;
// should we worry about COW underneaeth us?
- if (writeback_handler.may_copy_on_write(soid.oid, ex_it->offset, ex_it->length, soid.snap)) {
+ if (writeback_handler.may_copy_on_write(soid.oid, ex_it->offset, ex_it->length, soid.snap, oset->rbd_image_overlap)) {
ldout(cct, 20) << "readx may copy on write" << dendl;
bool wait = false;
for (map<loff_t, BufferHead*>::iterator bh_it = o->data.begin();
diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h
index ac2833b9ea6..c4fad75d312 100644
--- a/src/osdc/ObjectCacher.h
+++ b/src/osdc/ObjectCacher.h
@@ -172,7 +172,7 @@ class ObjectCacher {
xlist<Object*>::item set_item;
object_locator_t oloc;
uint64_t truncate_size, truncate_seq;
-
+
bool complete;
bool exists;
@@ -305,18 +305,23 @@ class ObjectCacher {
struct ObjectSet {
void *parent;
+ // only for cephfs
inodeno_t ino;
uint64_t truncate_seq, truncate_size;
+ // only for rbd
+ uint64_t rbd_image_overlap;
+
int64_t poolid;
xlist<Object*> objects;
int dirty_or_tx;
bool return_enoent;
- ObjectSet(void *p, int64_t _poolid, inodeno_t i)
+ ObjectSet(void *p, int64_t _poolid, inodeno_t i, uint64_t overlap)
: parent(p), ino(i), truncate_seq(0),
- truncate_size(0), poolid(_poolid), dirty_or_tx(0),
+ truncate_size(0), rbd_image_overlap(overlap),
+ poolid(_poolid), dirty_or_tx(0),
return_enoent(false) {}
};
diff --git a/src/osdc/WritebackHandler.h b/src/osdc/WritebackHandler.h
index 17e1f683bec..f816f1218c7 100644
--- a/src/osdc/WritebackHandler.h
+++ b/src/osdc/WritebackHandler.h
@@ -26,12 +26,15 @@ class WritebackHandler {
* @param read_off read offset
* @param read_len read length
* @param snapid read snapid
+ * @param image_overlap for rbd, the offset through which a child
+ * image overlaps its parent
*/
- virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid) = 0;
+ virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid, uint64_t image_overlap) = 0;
virtual tid_t write(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, const SnapContext& snapc,
const bufferlist &bl, utime_t mtime, uint64_t trunc_size,
- __u32 trunc_seq, Context *oncommit) = 0;
+ __u32 trunc_seq, uint64_t image_overlap,
+ Context *oncommit) = 0;
virtual tid_t lock(const object_t& oid, const object_locator_t& oloc, int op,
int flags, Context *onack, Context *oncommit) {
assert(0 == "this WritebackHandler does not support the lock operation");
diff --git a/src/test/osdc/FakeWriteback.cc b/src/test/osdc/FakeWriteback.cc
index b4cd35ea979..2fdc20299cc 100644
--- a/src/test/osdc/FakeWriteback.cc
+++ b/src/test/osdc/FakeWriteback.cc
@@ -74,6 +74,7 @@ tid_t FakeWriteback::write(const object_t& oid,
const SnapContext& snapc,
const bufferlist &bl, utime_t mtime,
uint64_t trunc_size, __u32 trunc_seq,
+ uint64_t image_overlap,
Context *oncommit)
{
C_Delay *wrapper = new C_Delay(m_cct, oncommit, m_lock, off, NULL, m_delay_ns);;
@@ -81,7 +82,7 @@ tid_t FakeWriteback::write(const object_t& oid,
return m_tid.inc();
}
-bool FakeWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t, snapid_t)
+bool FakeWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t, snapid_t, uint64_t)
{
return false;
}
diff --git a/src/test/osdc/FakeWriteback.h b/src/test/osdc/FakeWriteback.h
index e7d6dc16bb4..6dbb9164939 100644
--- a/src/test/osdc/FakeWriteback.h
+++ b/src/test/osdc/FakeWriteback.h
@@ -25,9 +25,10 @@ public:
virtual tid_t write(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, const SnapContext& snapc,
const bufferlist &bl, utime_t mtime, uint64_t trunc_size,
- __u32 trunc_seq, Context *oncommit);
+ __u32 trunc_seq, uint64_t image_overlap,
+ Context *oncommit);
- virtual bool may_copy_on_write(const object_t&, uint64_t, uint64_t, snapid_t);
+ virtual bool may_copy_on_write(const object_t&, uint64_t, uint64_t, snapid_t, uint64_t);
private:
CephContext *m_cct;
Mutex *m_lock;
diff --git a/src/test/osdc/object_cacher_stress.cc b/src/test/osdc/object_cacher_stress.cc
index ee71b0e898c..15580e87d00 100644
--- a/src/test/osdc/object_cacher_stress.cc
+++ b/src/test/osdc/object_cacher_stress.cc
@@ -68,7 +68,7 @@ int stress_test(uint64_t num_ops, uint64_t num_objs,
atomic_t outstanding_reads;
vector<std::tr1::shared_ptr<op_data> > ops;
- ObjectCacher::ObjectSet object_set(NULL, 0, 0);
+ ObjectCacher::ObjectSet object_set(NULL, 0, 0, 0);
SnapContext snapc;
ceph::buffer::ptr bp(max_op_len);
ceph::bufferlist bl;