diff options
author | Sage Weil <sage@inktank.com> | 2013-10-01 17:15:25 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-10-01 17:15:25 -0700 |
commit | 1db0a572c193d207162da54afe645b7850d2cc2c (patch) | |
tree | bfd894a5a87e399f060c11b3dab5e90d1f9b8cfb | |
parent | f97772aa03da5d4125207ef39d5a80dfe3087cbb (diff) | |
parent | 0d610926d7179240a8268923631ee9dbdbca6eeb (diff) | |
download | ceph-1db0a572c193d207162da54afe645b7850d2cc2c.tar.gz |
Merge pull request #675 from ceph/wip-osd-dirty
osd: add a dirty flag for objects.
Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r-- | src/common/ceph_strings.cc | 2 | ||||
-rw-r--r-- | src/include/rados.h | 2 | ||||
-rw-r--r-- | src/include/rados/librados.hpp | 15 | ||||
-rw-r--r-- | src/librados/librados.cc | 14 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 32 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.h | 3 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 8 | ||||
-rw-r--r-- | src/osd/osd_types.h | 24 | ||||
-rw-r--r-- | src/osdc/Objecter.h | 42 | ||||
-rw-r--r-- | src/test/librados/misc.cc | 54 |
10 files changed, 186 insertions, 10 deletions
diff --git a/src/common/ceph_strings.cc b/src/common/ceph_strings.cc index cd08083967a..47648ce19b3 100644 --- a/src/common/ceph_strings.cc +++ b/src/common/ceph_strings.cc @@ -50,6 +50,8 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_COPY_GET: return "copy-get"; case CEPH_OSD_OP_COPY_FROM: return "copy-from"; + case CEPH_OSD_OP_UNDIRTY: return "undirty"; + case CEPH_OSD_OP_ISDIRTY: return "isdirty"; case CEPH_OSD_OP_CLONERANGE: return "clonerange"; case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version"; diff --git a/src/include/rados.h b/src/include/rados.h index 178c171c445..e7a32b5afef 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -219,6 +219,8 @@ enum { CEPH_OSD_OP_COPY_FROM = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 26, CEPH_OSD_OP_COPY_GET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 27, + CEPH_OSD_OP_UNDIRTY = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 28, + CEPH_OSD_OP_ISDIRTY = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 29, /** multi **/ CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp index 358142c8cb4..3f6d025ff41 100644 --- a/src/include/rados/librados.hpp +++ b/src/include/rados/librados.hpp @@ -283,6 +283,13 @@ namespace librados */ void copy_from(const std::string& src, const IoCtx& src_ioctx, uint64_t src_version); + /** + * undirty an object + * + * Clear an objects dirty flag + */ + void undirty(); + friend class IoCtx; }; @@ -401,6 +408,14 @@ namespace librados */ void list_snaps(snap_set_t *out_snaps, int *prval); + /** + * query dirty state of an object + * + * @param out_dirty [out] pointer to resulting bool + * @param prval [out] place error code in prval upon completion + */ + void is_dirty(bool *isdirty, int *prval); + }; /* IoCtx : This is a context in which we can perform I/O. diff --git a/src/librados/librados.cc b/src/librados/librados.cc index 63092d1093d..217a0a7bfb2 100644 --- a/src/librados/librados.cc +++ b/src/librados/librados.cc @@ -269,6 +269,14 @@ void librados::ObjectReadOperation::list_snaps( o->list_snaps(out_snaps, prval); } +void librados::ObjectReadOperation::is_dirty(bool *is_dirty, int *prval) +{ + ::ObjectOperation *o = (::ObjectOperation *)impl; + o->is_dirty(is_dirty, prval); +} + + + int librados::IoCtx::omap_get_vals(const std::string& oid, const std::string& start_after, const std::string& filter_prefix, @@ -390,6 +398,12 @@ void librados::ObjectWriteOperation::copy_from(const std::string& src, o->copy_from(object_t(src), src_ioctx.io_ctx_impl->snap_seq, src_ioctx.io_ctx_impl->oloc, src_version); } +void librados::ObjectWriteOperation::undirty() +{ + ::ObjectOperation *o = (::ObjectOperation *)impl; + o->undirty(); +} + void librados::ObjectWriteOperation::tmap_put(const bufferlist &bl) { ::ObjectOperation *o = (::ObjectOperation *)impl; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 960305bd21e..004b5193e4b 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -2634,6 +2634,25 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } break; + case CEPH_OSD_OP_ISDIRTY: + ++ctx->num_read; + { + bool is_dirty = obs.oi.is_dirty(); + ::encode(is_dirty, osd_op.outdata); + ctx->delta_stats.num_rd++; + result = 0; + } + break; + + case CEPH_OSD_OP_UNDIRTY: + ++ctx->num_write; + { + ctx->undirty = true; // see make_writeable() + ctx->modify = true; + ctx->delta_stats.num_wr++; + } + break; + case CEPH_OSD_OP_GETXATTR: ++ctx->num_read; { @@ -2749,8 +2768,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) result = -ERANGE; else if (ver > oi.user_version) result = -EOVERFLOW; - break; } + break; case CEPH_OSD_OP_LIST_WATCHERS: ++ctx->num_read; @@ -3058,7 +3077,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } } break; - + case CEPH_OSD_OP_TRIMTRUNC: op.extent.offset = op.extent.truncate_size; // falling through @@ -3987,6 +4006,15 @@ void ReplicatedPG::make_writeable(OpContext *ctx) dout(20) << "make_writeable " << soid << " snapset=" << ctx->snapset << " snapc=" << snapc << dendl;; + // we will mark the object dirty + if (ctx->undirty) { + dout(20) << " clearing DIRTY flag" << dendl; + ctx->new_obs.oi.clear_flag(object_info_t::FLAG_DIRTY); + } else { + dout(20) << " setting DIRTY flag" << dendl; + ctx->new_obs.oi.set_flag(object_info_t::FLAG_DIRTY); + } + // use newer snapc? if (ctx->new_snapset.seq > snapc.seq) { snapc.seq = ctx->new_snapset.seq; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index abee57ffe7d..4b91a2135dc 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -254,6 +254,7 @@ public: bool modify; // (force) modification (even if op_t is empty) bool user_modify; // user-visible modification + bool undirty; // user explicitly un-dirtying this object // side effects list<watch_info_t> watch_connects; @@ -308,7 +309,7 @@ public: ReplicatedPG *_pg) : op(_op), reqid(_reqid), ops(_ops), obs(_obs), snapset(0), new_obs(_obs->oi, _obs->exists), - modify(false), user_modify(false), + modify(false), user_modify(false), undirty(false), bytes_written(0), bytes_read(0), user_at_version(0), current_osd_subop_num(0), data_off(0), reply(NULL), pg(_pg), diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 1fecece9290..27f7b171677 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2968,10 +2968,8 @@ ostream& operator<<(ostream& out, const object_info_t& oi) out << " wrlock_by=" << oi.wrlock_by; else out << " " << oi.snaps; - if (oi.is_lost()) - out << " LOST"; - if (oi.is_whiteout()) - out << " WHITEOUT"; + if (oi.flags) + out << " " << oi.get_flag_string(); out << ")"; return out; } @@ -3525,6 +3523,8 @@ ostream& operator<<(ostream& out, const OSDOp& op) case CEPH_OSD_OP_DELETE: case CEPH_OSD_OP_LIST_WATCHERS: case CEPH_OSD_OP_LIST_SNAPS: + case CEPH_OSD_OP_UNDIRTY: + case CEPH_OSD_OP_ISDIRTY: break; case CEPH_OSD_OP_ASSERT_VER: out << " v" << op.op.assert_ver.ver; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 9d440e25bde..72ee32c4cc5 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2099,17 +2099,36 @@ struct object_info_t { typedef enum { FLAG_LOST = 1<<0, FLAG_WHITEOUT = 1<<1, // object logically does not exist + FLAG_DIRTY = 1<<2, // object has been modified since last flushed or undirtied // ... FLAG_USES_TMAP = 1<<8, } flag_t; + flag_t flags; + static string get_flag_string(flag_t flags) { + string s; + if (flags & FLAG_LOST) + s += "|lost"; + if (flags & FLAG_WHITEOUT) + s += "|whiteout"; + if (flags & FLAG_DIRTY) + s += "|dirty"; + if (flags & FLAG_USES_TMAP) + s += "|uses_tmap"; + if (s.length()) + return s.substr(1); + return s; + } + string get_flag_string() const { + return get_flag_string(flags); + } + osd_reqid_t wrlock_by; // [head] vector<snapid_t> snaps; // [clone] uint64_t truncate_seq, truncate_size; - map<pair<uint64_t, entity_name_t>, watch_info_t> watchers; void copy_user_bits(const object_info_t& other); @@ -2132,6 +2151,9 @@ struct object_info_t { bool is_whiteout() const { return test_flag(FLAG_WHITEOUT); } + bool is_dirty() const { + return test_flag(FLAG_DIRTY); + } void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index eef226ad9b2..1196633276d 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -617,9 +617,10 @@ struct ObjectOperation { } ::decode(*cursor, p); } catch (buffer::error& e) { - if (prval) - *prval = -EIO; + r = -EIO; } + if (prval) + *prval = r; } }; @@ -643,6 +644,43 @@ struct ObjectOperation { out_handler[p] = h; } + void undirty() { + add_op(CEPH_OSD_OP_UNDIRTY); + } + + struct C_ObjectOperation_isdirty : public Context { + bufferlist bl; + bool *pisdirty; + int *prval; + C_ObjectOperation_isdirty(bool *p, int *r) + : pisdirty(p), prval(r) {} + void finish(int r) { + if (r < 0) + return; + try { + bufferlist::iterator p = bl.begin(); + bool isdirty; + ::decode(isdirty, p); + if (pisdirty) + *pisdirty = isdirty; + } catch (buffer::error& e) { + r = -EIO; + } + if (prval) + *prval = r; + } + }; + + void is_dirty(bool *pisdirty, int *prval) { + add_op(CEPH_OSD_OP_ISDIRTY); + unsigned p = ops.size() - 1; + out_rval[p] = prval; + C_ObjectOperation_isdirty *h = + new C_ObjectOperation_isdirty(pisdirty, prval); + out_bl[p] = &h->bl; + out_handler[p] = h; + } + void omap_get_header(bufferlist *bl, int *prval) { add_op(CEPH_OSD_OP_OMAPGETHEADER); unsigned p = ops.size() - 1; diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc index 20847e7b8b9..9abac9c412a 100644 --- a/src/test/librados/misc.cc +++ b/src/test/librados/misc.cc @@ -647,6 +647,60 @@ TEST(LibRadosMisc, CopyPP) { ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster)); } +TEST(LibRadosMisc, Dirty) { + Rados cluster; + std::string pool_name = get_temp_pool_name(); + ASSERT_EQ("", create_one_pool_pp(pool_name, cluster)); + IoCtx ioctx; + ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx)); + + { + ObjectWriteOperation op; + op.create(true); + ASSERT_EQ(0, ioctx.operate("foo", &op)); + } + { + bool dirty = false; + int r = -1; + ObjectReadOperation op; + op.is_dirty(&dirty, &r); + ASSERT_EQ(0, ioctx.operate("foo", &op, NULL)); + ASSERT_TRUE(dirty); + ASSERT_EQ(0, r); + } + { + ObjectWriteOperation op; + op.undirty(); + ASSERT_EQ(0, ioctx.operate("foo", &op)); + } + { + bool dirty = false; + int r = -1; + ObjectReadOperation op; + op.is_dirty(&dirty, &r); + ASSERT_EQ(0, ioctx.operate("foo", &op, NULL)); + ASSERT_FALSE(dirty); + ASSERT_EQ(0, r); + } + { + ObjectWriteOperation op; + op.truncate(0); // still a write even tho it is a no-op + ASSERT_EQ(0, ioctx.operate("foo", &op)); + } + { + bool dirty = false; + int r = -1; + ObjectReadOperation op; + op.is_dirty(&dirty, &r); + ASSERT_EQ(0, ioctx.operate("foo", &op, NULL)); + ASSERT_TRUE(dirty); + ASSERT_EQ(0, r); + } + + ioctx.close(); + ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster)); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); |