summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-10-01 17:15:25 -0700
committerSage Weil <sage@inktank.com>2013-10-01 17:15:25 -0700
commit1db0a572c193d207162da54afe645b7850d2cc2c (patch)
treebfd894a5a87e399f060c11b3dab5e90d1f9b8cfb
parentf97772aa03da5d4125207ef39d5a80dfe3087cbb (diff)
parent0d610926d7179240a8268923631ee9dbdbca6eeb (diff)
downloadceph-1db0a572c193d207162da54afe645b7850d2cc2c.tar.gz
Merge pull request #675 from ceph/wip-osd-dirty
osd: add a dirty flag for objects. Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/common/ceph_strings.cc2
-rw-r--r--src/include/rados.h2
-rw-r--r--src/include/rados/librados.hpp15
-rw-r--r--src/librados/librados.cc14
-rw-r--r--src/osd/ReplicatedPG.cc32
-rw-r--r--src/osd/ReplicatedPG.h3
-rw-r--r--src/osd/osd_types.cc8
-rw-r--r--src/osd/osd_types.h24
-rw-r--r--src/osdc/Objecter.h42
-rw-r--r--src/test/librados/misc.cc54
10 files changed, 186 insertions, 10 deletions
diff --git a/src/common/ceph_strings.cc b/src/common/ceph_strings.cc
index cd08083967a..47648ce19b3 100644
--- a/src/common/ceph_strings.cc
+++ b/src/common/ceph_strings.cc
@@ -50,6 +50,8 @@ const char *ceph_osd_op_name(int op)
case CEPH_OSD_OP_COPY_GET: return "copy-get";
case CEPH_OSD_OP_COPY_FROM: return "copy-from";
+ case CEPH_OSD_OP_UNDIRTY: return "undirty";
+ case CEPH_OSD_OP_ISDIRTY: return "isdirty";
case CEPH_OSD_OP_CLONERANGE: return "clonerange";
case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version";
diff --git a/src/include/rados.h b/src/include/rados.h
index 178c171c445..e7a32b5afef 100644
--- a/src/include/rados.h
+++ b/src/include/rados.h
@@ -219,6 +219,8 @@ enum {
CEPH_OSD_OP_COPY_FROM = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 26,
CEPH_OSD_OP_COPY_GET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 27,
+ CEPH_OSD_OP_UNDIRTY = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 28,
+ CEPH_OSD_OP_ISDIRTY = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 29,
/** multi **/
CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp
index 358142c8cb4..3f6d025ff41 100644
--- a/src/include/rados/librados.hpp
+++ b/src/include/rados/librados.hpp
@@ -283,6 +283,13 @@ namespace librados
*/
void copy_from(const std::string& src, const IoCtx& src_ioctx, uint64_t src_version);
+ /**
+ * undirty an object
+ *
+ * Clear an objects dirty flag
+ */
+ void undirty();
+
friend class IoCtx;
};
@@ -401,6 +408,14 @@ namespace librados
*/
void list_snaps(snap_set_t *out_snaps, int *prval);
+ /**
+ * query dirty state of an object
+ *
+ * @param out_dirty [out] pointer to resulting bool
+ * @param prval [out] place error code in prval upon completion
+ */
+ void is_dirty(bool *isdirty, int *prval);
+
};
/* IoCtx : This is a context in which we can perform I/O.
diff --git a/src/librados/librados.cc b/src/librados/librados.cc
index 63092d1093d..217a0a7bfb2 100644
--- a/src/librados/librados.cc
+++ b/src/librados/librados.cc
@@ -269,6 +269,14 @@ void librados::ObjectReadOperation::list_snaps(
o->list_snaps(out_snaps, prval);
}
+void librados::ObjectReadOperation::is_dirty(bool *is_dirty, int *prval)
+{
+ ::ObjectOperation *o = (::ObjectOperation *)impl;
+ o->is_dirty(is_dirty, prval);
+}
+
+
+
int librados::IoCtx::omap_get_vals(const std::string& oid,
const std::string& start_after,
const std::string& filter_prefix,
@@ -390,6 +398,12 @@ void librados::ObjectWriteOperation::copy_from(const std::string& src,
o->copy_from(object_t(src), src_ioctx.io_ctx_impl->snap_seq, src_ioctx.io_ctx_impl->oloc, src_version);
}
+void librados::ObjectWriteOperation::undirty()
+{
+ ::ObjectOperation *o = (::ObjectOperation *)impl;
+ o->undirty();
+}
+
void librados::ObjectWriteOperation::tmap_put(const bufferlist &bl)
{
::ObjectOperation *o = (::ObjectOperation *)impl;
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 960305bd21e..004b5193e4b 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -2634,6 +2634,25 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
break;
+ case CEPH_OSD_OP_ISDIRTY:
+ ++ctx->num_read;
+ {
+ bool is_dirty = obs.oi.is_dirty();
+ ::encode(is_dirty, osd_op.outdata);
+ ctx->delta_stats.num_rd++;
+ result = 0;
+ }
+ break;
+
+ case CEPH_OSD_OP_UNDIRTY:
+ ++ctx->num_write;
+ {
+ ctx->undirty = true; // see make_writeable()
+ ctx->modify = true;
+ ctx->delta_stats.num_wr++;
+ }
+ break;
+
case CEPH_OSD_OP_GETXATTR:
++ctx->num_read;
{
@@ -2749,8 +2768,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = -ERANGE;
else if (ver > oi.user_version)
result = -EOVERFLOW;
- break;
}
+ break;
case CEPH_OSD_OP_LIST_WATCHERS:
++ctx->num_read;
@@ -3058,7 +3077,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
}
break;
-
+
case CEPH_OSD_OP_TRIMTRUNC:
op.extent.offset = op.extent.truncate_size;
// falling through
@@ -3987,6 +4006,15 @@ void ReplicatedPG::make_writeable(OpContext *ctx)
dout(20) << "make_writeable " << soid << " snapset=" << ctx->snapset
<< " snapc=" << snapc << dendl;;
+ // we will mark the object dirty
+ if (ctx->undirty) {
+ dout(20) << " clearing DIRTY flag" << dendl;
+ ctx->new_obs.oi.clear_flag(object_info_t::FLAG_DIRTY);
+ } else {
+ dout(20) << " setting DIRTY flag" << dendl;
+ ctx->new_obs.oi.set_flag(object_info_t::FLAG_DIRTY);
+ }
+
// use newer snapc?
if (ctx->new_snapset.seq > snapc.seq) {
snapc.seq = ctx->new_snapset.seq;
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index abee57ffe7d..4b91a2135dc 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -254,6 +254,7 @@ public:
bool modify; // (force) modification (even if op_t is empty)
bool user_modify; // user-visible modification
+ bool undirty; // user explicitly un-dirtying this object
// side effects
list<watch_info_t> watch_connects;
@@ -308,7 +309,7 @@ public:
ReplicatedPG *_pg) :
op(_op), reqid(_reqid), ops(_ops), obs(_obs), snapset(0),
new_obs(_obs->oi, _obs->exists),
- modify(false), user_modify(false),
+ modify(false), user_modify(false), undirty(false),
bytes_written(0), bytes_read(0), user_at_version(0),
current_osd_subop_num(0),
data_off(0), reply(NULL), pg(_pg),
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 1fecece9290..27f7b171677 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -2968,10 +2968,8 @@ ostream& operator<<(ostream& out, const object_info_t& oi)
out << " wrlock_by=" << oi.wrlock_by;
else
out << " " << oi.snaps;
- if (oi.is_lost())
- out << " LOST";
- if (oi.is_whiteout())
- out << " WHITEOUT";
+ if (oi.flags)
+ out << " " << oi.get_flag_string();
out << ")";
return out;
}
@@ -3525,6 +3523,8 @@ ostream& operator<<(ostream& out, const OSDOp& op)
case CEPH_OSD_OP_DELETE:
case CEPH_OSD_OP_LIST_WATCHERS:
case CEPH_OSD_OP_LIST_SNAPS:
+ case CEPH_OSD_OP_UNDIRTY:
+ case CEPH_OSD_OP_ISDIRTY:
break;
case CEPH_OSD_OP_ASSERT_VER:
out << " v" << op.op.assert_ver.ver;
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 9d440e25bde..72ee32c4cc5 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -2099,17 +2099,36 @@ struct object_info_t {
typedef enum {
FLAG_LOST = 1<<0,
FLAG_WHITEOUT = 1<<1, // object logically does not exist
+ FLAG_DIRTY = 1<<2, // object has been modified since last flushed or undirtied
// ...
FLAG_USES_TMAP = 1<<8,
} flag_t;
+
flag_t flags;
+ static string get_flag_string(flag_t flags) {
+ string s;
+ if (flags & FLAG_LOST)
+ s += "|lost";
+ if (flags & FLAG_WHITEOUT)
+ s += "|whiteout";
+ if (flags & FLAG_DIRTY)
+ s += "|dirty";
+ if (flags & FLAG_USES_TMAP)
+ s += "|uses_tmap";
+ if (s.length())
+ return s.substr(1);
+ return s;
+ }
+ string get_flag_string() const {
+ return get_flag_string(flags);
+ }
+
osd_reqid_t wrlock_by; // [head]
vector<snapid_t> snaps; // [clone]
uint64_t truncate_seq, truncate_size;
-
map<pair<uint64_t, entity_name_t>, watch_info_t> watchers;
void copy_user_bits(const object_info_t& other);
@@ -2132,6 +2151,9 @@ struct object_info_t {
bool is_whiteout() const {
return test_flag(FLAG_WHITEOUT);
}
+ bool is_dirty() const {
+ return test_flag(FLAG_DIRTY);
+ }
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index eef226ad9b2..1196633276d 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -617,9 +617,10 @@ struct ObjectOperation {
}
::decode(*cursor, p);
} catch (buffer::error& e) {
- if (prval)
- *prval = -EIO;
+ r = -EIO;
}
+ if (prval)
+ *prval = r;
}
};
@@ -643,6 +644,43 @@ struct ObjectOperation {
out_handler[p] = h;
}
+ void undirty() {
+ add_op(CEPH_OSD_OP_UNDIRTY);
+ }
+
+ struct C_ObjectOperation_isdirty : public Context {
+ bufferlist bl;
+ bool *pisdirty;
+ int *prval;
+ C_ObjectOperation_isdirty(bool *p, int *r)
+ : pisdirty(p), prval(r) {}
+ void finish(int r) {
+ if (r < 0)
+ return;
+ try {
+ bufferlist::iterator p = bl.begin();
+ bool isdirty;
+ ::decode(isdirty, p);
+ if (pisdirty)
+ *pisdirty = isdirty;
+ } catch (buffer::error& e) {
+ r = -EIO;
+ }
+ if (prval)
+ *prval = r;
+ }
+ };
+
+ void is_dirty(bool *pisdirty, int *prval) {
+ add_op(CEPH_OSD_OP_ISDIRTY);
+ unsigned p = ops.size() - 1;
+ out_rval[p] = prval;
+ C_ObjectOperation_isdirty *h =
+ new C_ObjectOperation_isdirty(pisdirty, prval);
+ out_bl[p] = &h->bl;
+ out_handler[p] = h;
+ }
+
void omap_get_header(bufferlist *bl, int *prval) {
add_op(CEPH_OSD_OP_OMAPGETHEADER);
unsigned p = ops.size() - 1;
diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc
index 20847e7b8b9..9abac9c412a 100644
--- a/src/test/librados/misc.cc
+++ b/src/test/librados/misc.cc
@@ -647,6 +647,60 @@ TEST(LibRadosMisc, CopyPP) {
ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
}
+TEST(LibRadosMisc, Dirty) {
+ Rados cluster;
+ std::string pool_name = get_temp_pool_name();
+ ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
+ IoCtx ioctx;
+ ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
+
+ {
+ ObjectWriteOperation op;
+ op.create(true);
+ ASSERT_EQ(0, ioctx.operate("foo", &op));
+ }
+ {
+ bool dirty = false;
+ int r = -1;
+ ObjectReadOperation op;
+ op.is_dirty(&dirty, &r);
+ ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
+ ASSERT_TRUE(dirty);
+ ASSERT_EQ(0, r);
+ }
+ {
+ ObjectWriteOperation op;
+ op.undirty();
+ ASSERT_EQ(0, ioctx.operate("foo", &op));
+ }
+ {
+ bool dirty = false;
+ int r = -1;
+ ObjectReadOperation op;
+ op.is_dirty(&dirty, &r);
+ ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
+ ASSERT_FALSE(dirty);
+ ASSERT_EQ(0, r);
+ }
+ {
+ ObjectWriteOperation op;
+ op.truncate(0); // still a write even tho it is a no-op
+ ASSERT_EQ(0, ioctx.operate("foo", &op));
+ }
+ {
+ bool dirty = false;
+ int r = -1;
+ ObjectReadOperation op;
+ op.is_dirty(&dirty, &r);
+ ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
+ ASSERT_TRUE(dirty);
+ ASSERT_EQ(0, r);
+ }
+
+ ioctx.close();
+ ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+}
+
int main(int argc, char **argv)
{
::testing::InitGoogleTest(&argc, argv);