diff options
author | Josh Durgin <josh.durgin@inktank.com> | 2013-07-24 18:14:29 -0700 |
---|---|---|
committer | Josh Durgin <josh.durgin@inktank.com> | 2013-07-25 18:44:10 -0700 |
commit | 02ec1a7502fe0aa612be6c088c4a0efda022da98 (patch) | |
tree | beb6af50eeba386ba72339c01651face7acefbec | |
parent | a4b9707e5b5d38b1e8929f52c5f71d5f9e519ceb (diff) | |
download | ceph-02ec1a7502fe0aa612be6c088c4a0efda022da98.tar.gz |
ObjectCacher: store image_overlap in ObjectSet
Pass the image_overlap to the WritebackHandler functions that need it,
may_copy_on_write() and write(), so that they don't need to take any
locks to read the current overlap. This is a step towards changing
the lock ordering in librbd so the cache lock may be taken inside
of the snap and parent locks.
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r-- | src/client/Inode.h | 2 | ||||
-rw-r--r-- | src/client/ObjecterWriteback.h | 5 | ||||
-rw-r--r-- | src/librbd/ImageCtx.cc | 11 | ||||
-rw-r--r-- | src/librbd/ImageCtx.h | 1 | ||||
-rw-r--r-- | src/librbd/LibrbdWriteback.cc | 19 | ||||
-rw-r--r-- | src/librbd/LibrbdWriteback.h | 5 | ||||
-rw-r--r-- | src/librbd/internal.cc | 35 | ||||
-rw-r--r-- | src/osdc/ObjectCacher.cc | 5 | ||||
-rw-r--r-- | src/osdc/ObjectCacher.h | 11 | ||||
-rw-r--r-- | src/osdc/WritebackHandler.h | 7 | ||||
-rw-r--r-- | src/test/osdc/FakeWriteback.cc | 3 | ||||
-rw-r--r-- | src/test/osdc/FakeWriteback.h | 5 | ||||
-rw-r--r-- | src/test/osdc/object_cacher_stress.cc | 2 |
13 files changed, 69 insertions, 42 deletions
diff --git a/src/client/Inode.h b/src/client/Inode.h index af4830b5c77..96450c7b85c 100644 --- a/src/client/Inode.h +++ b/src/client/Inode.h @@ -214,7 +214,7 @@ class Inode { exporting_issued(0), exporting_mds(-1), exporting_mseq(0), cap_item(this), flushing_cap_item(this), last_flush_tid(0), snaprealm(0), snaprealm_item(this), snapdir_parent(0), - oset((void *)this, newlayout->fl_pg_pool, ino), + oset((void *)this, newlayout->fl_pg_pool, ino, 0), reported_size(0), wanted_max_size(0), requested_max_size(0), _ref(0), ll_ref(0), dir(0), dn_set() diff --git a/src/client/ObjecterWriteback.h b/src/client/ObjecterWriteback.h index 9a10fb48a06..9fd4de26d62 100644 --- a/src/client/ObjecterWriteback.h +++ b/src/client/ObjecterWriteback.h @@ -19,14 +19,15 @@ class ObjecterWriteback : public WritebackHandler { trunc_size, trunc_seq, onfinish); } - virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid) { + virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid, uint64_t image_overlap) { return false; } virtual tid_t write(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, uint64_t trunc_size, - __u32 trunc_seq, Context *oncommit) { + __u32 trunc_seq, uint64_t image_overlap, + Context *oncommit) { return m_objecter->write_trunc(oid, oloc, off, len, snapc, bl, mtime, 0, trunc_size, trunc_seq, NULL, oncommit); } diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 41518b67698..8ade3de7539 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -91,7 +91,7 @@ namespace librbd { cct->_conf->rbd_cache_target_dirty, cct->_conf->rbd_cache_max_dirty_age, cct->_conf->rbd_cache_block_writes_upfront); - object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0); + object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0, 0); object_set->return_enoent = true; object_cacher->start(); } @@ -470,6 +470,15 @@ namespace librbd { return 0; } + void ImageCtx::refresh_overlap(uint64_t overlap) + { + if (!object_cacher) + return; + cache_lock.Lock(); + object_set->rbd_image_overlap = overlap; + cache_lock.Unlock(); + } + void ImageCtx::aio_read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off, Context *onfinish) { snap_lock.get_read(); diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 29ca2f197ea..cf8290edc8a 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -127,6 +127,7 @@ namespace librbd { uint64_t get_parent_snap_id(librados::snap_t in_snap_id) const; int get_parent_overlap(librados::snap_t in_snap_id, uint64_t *overlap) const; + void refresh_overlap(uint64_t overlap); void aio_read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off, Context *onfinish); void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off, diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc index 737f1f33cd7..b8f4b536e9d 100644 --- a/src/librbd/LibrbdWriteback.cc +++ b/src/librbd/LibrbdWriteback.cc @@ -109,13 +109,8 @@ namespace librbd { assert(r >= 0); } - bool LibrbdWriteback::may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid) + bool LibrbdWriteback::may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid, uint64_t image_overlap) { - m_ictx->parent_lock.get_read(); - uint64_t overlap = 0; - m_ictx->get_parent_overlap(snapid, &overlap); - m_ictx->parent_lock.put_read(); - uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix); // reverse map this object extent onto the parent @@ -123,7 +118,8 @@ namespace librbd { Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, object_no, 0, m_ictx->layout.fl_object_size, objectx); - uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap); + uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, + image_overlap); bool may = object_overlap > 0; ldout(m_ictx->cct, 10) << "may_copy_on_write " << oid << " " << read_off << "~" << read_len << " = " << may << dendl; return may; @@ -135,13 +131,9 @@ namespace librbd { const SnapContext& snapc, const bufferlist &bl, utime_t mtime, uint64_t trunc_size, __u32 trunc_seq, + uint64_t image_overlap, Context *oncommit) { - m_ictx->parent_lock.get_read(); - uint64_t overlap = 0; - m_ictx->get_parent_overlap(LIBRADOS_SNAP_HEAD, &overlap); - m_ictx->parent_lock.put_read(); - uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix); // reverse map this object extent onto the parent @@ -149,7 +141,8 @@ namespace librbd { Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, object_no, 0, m_ictx->layout.fl_object_size, objectx); - uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap); + uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, + image_overlap); write_result_d *result = new write_result_d(oid.name, oncommit); m_writes[oid.name].push(result); ldout(m_ictx->cct, 20) << "write will wait for result " << result << dendl; diff --git a/src/librbd/LibrbdWriteback.h b/src/librbd/LibrbdWriteback.h index ba8ff1f114d..8d7d558d308 100644 --- a/src/librbd/LibrbdWriteback.h +++ b/src/librbd/LibrbdWriteback.h @@ -29,13 +29,14 @@ namespace librbd { Context *onfinish); // Determine whether a read to this extent could be affected by a write-triggered copy-on-write - virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid); + virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid, uint64_t image_overlap); // Note that oloc, trunc_size, and trunc_seq are ignored virtual tid_t write(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, uint64_t trunc_size, - __u32 trunc_seq, Context *oncommit); + __u32 trunc_seq, uint64_t image_overlap, + Context *oncommit); struct write_result_d { bool done; diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 1fd7942f3b8..bf8a0afc062 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2000,19 +2000,30 @@ reprotect_and_return_err: int _snap_set(ImageCtx *ictx, const char *snap_name) { - RWLock::WLocker l1(ictx->snap_lock); - RWLock::WLocker l2(ictx->parent_lock); - int r; - if ((snap_name != NULL) && (strlen(snap_name) != 0)) { - r = ictx->snap_set(snap_name); - } else { - ictx->snap_unset(); - r = 0; - } - if (r < 0) { - return r; + uint64_t overlap = 0; + + { + RWLock::WLocker l1(ictx->snap_lock); + RWLock::WLocker l2(ictx->parent_lock); + int r; + if ((snap_name != NULL) && (strlen(snap_name) != 0)) { + r = ictx->snap_set(snap_name); + } else { + ictx->snap_unset(); + r = 0; + } + if (r < 0) { + return r; + } + refresh_parent(ictx); + assert(0 == ictx->get_parent_overlap(ictx->snap_id, &overlap)); } - refresh_parent(ictx); + /* overlap will only change when shrinking an image, which isn't + * safe while the image is in use. Thus, we don't need to keep + * snap_lock or parent_lock held. Dropping them before taking + * cache_lock avoids a lock ordering issue. + */ + ictx->refresh_overlap(overlap); return 0; } diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc index 51fad699555..d01e5a5f29c 100644 --- a/src/osdc/ObjectCacher.cc +++ b/src/osdc/ObjectCacher.cc @@ -795,6 +795,7 @@ void ObjectCacher::bh_write(BufferHead *bh) bh->start(), bh->length(), bh->snapc, bh->bl, bh->last_write, bh->ob->truncate_size, bh->ob->truncate_seq, + bh->ob->oset->rbd_image_overlap, oncommit); ldout(cct, 20) << " tid " << tid << " on " << bh->ob->get_oid() << dendl; @@ -831,7 +832,7 @@ void ObjectCacher::bh_write_commit(int64_t poolid, sobject_t oid, loff_t start, ldout(cct, 10) << "bh_write_commit marking exists on " << *ob << dendl; ob->exists = true; - if (writeback_handler.may_copy_on_write(ob->get_oid(), start, length, ob->get_snap())) { + if (writeback_handler.may_copy_on_write(ob->get_oid(), start, length, ob->get_snap(), ob->oset->rbd_image_overlap)) { ldout(cct, 10) << "bh_write_commit may copy on write, clearing complete on " << *ob << dendl; ob->complete = false; } @@ -1035,7 +1036,7 @@ int ObjectCacher::_readx(OSDRead *rd, ObjectSet *oset, Context *onfinish, ldout(cct, 10) << "readx object !exists, 1 extent..." << dendl; // should we worry about COW underneaeth us? - if (writeback_handler.may_copy_on_write(soid.oid, ex_it->offset, ex_it->length, soid.snap)) { + if (writeback_handler.may_copy_on_write(soid.oid, ex_it->offset, ex_it->length, soid.snap, oset->rbd_image_overlap)) { ldout(cct, 20) << "readx may copy on write" << dendl; bool wait = false; for (map<loff_t, BufferHead*>::iterator bh_it = o->data.begin(); diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h index ac2833b9ea6..c4fad75d312 100644 --- a/src/osdc/ObjectCacher.h +++ b/src/osdc/ObjectCacher.h @@ -172,7 +172,7 @@ class ObjectCacher { xlist<Object*>::item set_item; object_locator_t oloc; uint64_t truncate_size, truncate_seq; - + bool complete; bool exists; @@ -305,18 +305,23 @@ class ObjectCacher { struct ObjectSet { void *parent; + // only for cephfs inodeno_t ino; uint64_t truncate_seq, truncate_size; + // only for rbd + uint64_t rbd_image_overlap; + int64_t poolid; xlist<Object*> objects; int dirty_or_tx; bool return_enoent; - ObjectSet(void *p, int64_t _poolid, inodeno_t i) + ObjectSet(void *p, int64_t _poolid, inodeno_t i, uint64_t overlap) : parent(p), ino(i), truncate_seq(0), - truncate_size(0), poolid(_poolid), dirty_or_tx(0), + truncate_size(0), rbd_image_overlap(overlap), + poolid(_poolid), dirty_or_tx(0), return_enoent(false) {} }; diff --git a/src/osdc/WritebackHandler.h b/src/osdc/WritebackHandler.h index 17e1f683bec..f816f1218c7 100644 --- a/src/osdc/WritebackHandler.h +++ b/src/osdc/WritebackHandler.h @@ -26,12 +26,15 @@ class WritebackHandler { * @param read_off read offset * @param read_len read length * @param snapid read snapid + * @param image_overlap for rbd, the offset through which a child + * image overlaps its parent */ - virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid) = 0; + virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid, uint64_t image_overlap) = 0; virtual tid_t write(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, uint64_t trunc_size, - __u32 trunc_seq, Context *oncommit) = 0; + __u32 trunc_seq, uint64_t image_overlap, + Context *oncommit) = 0; virtual tid_t lock(const object_t& oid, const object_locator_t& oloc, int op, int flags, Context *onack, Context *oncommit) { assert(0 == "this WritebackHandler does not support the lock operation"); diff --git a/src/test/osdc/FakeWriteback.cc b/src/test/osdc/FakeWriteback.cc index b4cd35ea979..2fdc20299cc 100644 --- a/src/test/osdc/FakeWriteback.cc +++ b/src/test/osdc/FakeWriteback.cc @@ -74,6 +74,7 @@ tid_t FakeWriteback::write(const object_t& oid, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, uint64_t trunc_size, __u32 trunc_seq, + uint64_t image_overlap, Context *oncommit) { C_Delay *wrapper = new C_Delay(m_cct, oncommit, m_lock, off, NULL, m_delay_ns);; @@ -81,7 +82,7 @@ tid_t FakeWriteback::write(const object_t& oid, return m_tid.inc(); } -bool FakeWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t, snapid_t) +bool FakeWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t, snapid_t, uint64_t) { return false; } diff --git a/src/test/osdc/FakeWriteback.h b/src/test/osdc/FakeWriteback.h index e7d6dc16bb4..6dbb9164939 100644 --- a/src/test/osdc/FakeWriteback.h +++ b/src/test/osdc/FakeWriteback.h @@ -25,9 +25,10 @@ public: virtual tid_t write(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, uint64_t trunc_size, - __u32 trunc_seq, Context *oncommit); + __u32 trunc_seq, uint64_t image_overlap, + Context *oncommit); - virtual bool may_copy_on_write(const object_t&, uint64_t, uint64_t, snapid_t); + virtual bool may_copy_on_write(const object_t&, uint64_t, uint64_t, snapid_t, uint64_t); private: CephContext *m_cct; Mutex *m_lock; diff --git a/src/test/osdc/object_cacher_stress.cc b/src/test/osdc/object_cacher_stress.cc index ee71b0e898c..15580e87d00 100644 --- a/src/test/osdc/object_cacher_stress.cc +++ b/src/test/osdc/object_cacher_stress.cc @@ -68,7 +68,7 @@ int stress_test(uint64_t num_ops, uint64_t num_objs, atomic_t outstanding_reads; vector<std::tr1::shared_ptr<op_data> > ops; - ObjectCacher::ObjectSet object_set(NULL, 0, 0); + ObjectCacher::ObjectSet object_set(NULL, 0, 0, 0); SnapContext snapc; ceph::buffer::ptr bp(max_op_len); ceph::bufferlist bl; |