summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-08-27 15:25:50 -0700
committerSage Weil <sage@inktank.com>2013-08-30 16:57:25 -0700
commitb0a30a55eba6d646a04591fc038d2b1fce86f936 (patch)
tree2265d16f9059628d0cee4e77aaad030f9986006d
parent8d74f417eac3cfcfb0b99fdf0734e210447798ae (diff)
downloadceph-b0a30a55eba6d646a04591fc038d2b1fce86f936.tar.gz
osd: COPY_GET operation
Add new rados operation to copy all user-visible content for an object in a simple, safe way. Use a new object_copy_cursor_t to keep track of our position. Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/include/ceph_strings.cc2
-rw-r--r--src/include/encoding.h11
-rw-r--r--src/include/rados.h5
-rw-r--r--src/osd/ReplicatedPG.cc71
-rw-r--r--src/osd/osd_types.cc52
-rw-r--r--src/osd/osd_types.h31
-rw-r--r--src/osdc/Objecter.h76
-rw-r--r--src/test/encoding/types.h1
8 files changed, 249 insertions, 0 deletions
diff --git a/src/include/ceph_strings.cc b/src/include/ceph_strings.cc
index d46eca6aaf8..f14f29ce0e9 100644
--- a/src/include/ceph_strings.cc
+++ b/src/include/ceph_strings.cc
@@ -48,6 +48,8 @@ const char *ceph_osd_op_name(int op)
case CEPH_OSD_OP_TMAPPUT: return "tmapput";
case CEPH_OSD_OP_WATCH: return "watch";
+ case CEPH_OSD_OP_COPY_GET: return "copy-get";
+
case CEPH_OSD_OP_CLONERANGE: return "clonerange";
case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version";
case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr";
diff --git a/src/include/encoding.h b/src/include/encoding.h
index 67c9af59d2b..a091f7f69e9 100644
--- a/src/include/encoding.h
+++ b/src/include/encoding.h
@@ -562,6 +562,17 @@ inline void decode(std::map<T,U>& m, bufferlist::iterator& p)
}
}
template<class T, class U>
+inline void decode_noclear(std::map<T,U>& m, bufferlist::iterator& p)
+{
+ __u32 n;
+ decode(n, p);
+ while (n--) {
+ T k;
+ decode(k, p);
+ decode(m[k], p);
+ }
+}
+template<class T, class U>
inline void encode_nohead(const std::map<T,U>& m, bufferlist& bl)
{
for (typename std::map<T,U>::const_iterator p = m.begin(); p != m.end(); ++p) {
diff --git a/src/include/rados.h b/src/include/rados.h
index 9037606d154..27291a7440e 100644
--- a/src/include/rados.h
+++ b/src/include/rados.h
@@ -217,6 +217,8 @@ enum {
CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
+ CEPH_OSD_OP_COPY_GET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 27,
+
/** multi **/
CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
@@ -405,6 +407,9 @@ struct ceph_osd_op {
__le64 offset, length;
__le64 src_offset;
} __attribute__ ((packed)) clonerange;
+ struct {
+ __le64 max; /* max data in reply */
+ } __attribute__ ((packed)) copy_get;
};
__le32 payload_len;
} __attribute__ ((packed));
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 7d0811c4d96..ccef49a81b9 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -3361,6 +3361,77 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
break;
+ case CEPH_OSD_OP_COPY_GET:
+ ++ctx->num_read;
+ {
+ object_copy_cursor_t cursor;
+ uint64_t out_max;
+ try {
+ ::decode(cursor, bp);
+ ::decode(out_max, bp);
+ }
+ catch (buffer::error& e) {
+ result = -EINVAL;
+ goto fail;
+ }
+
+ // size, mtime
+ ::encode(oi.size, osd_op.outdata);
+ ::encode(oi.mtime, osd_op.outdata);
+
+ // attrs
+ map<string,bufferptr> out_attrs;
+ if (!cursor.attr_complete) {
+ result = osd->store->getattrs(coll, soid, out_attrs, true);
+ if (result < 0)
+ break;
+ cursor.attr_complete = true;
+ }
+ ::encode(out_attrs, osd_op.outdata);
+
+ int64_t left = out_max - osd_op.outdata.length();
+
+ // data
+ bufferlist bl;
+ if (left > 0 && !cursor.data_complete) {
+ if (cursor.data_offset < oi.size) {
+ result = osd->store->read(coll, oi.soid, cursor.data_offset, out_max, bl);
+ if (result < 0)
+ return result;
+ assert(result <= left);
+ left -= result;
+ cursor.data_offset += result;
+ }
+ if (cursor.data_offset == oi.size)
+ cursor.data_complete = true;
+ }
+ ::encode(bl, osd_op.outdata);
+
+ // omap
+ std::map<std::string,bufferlist> out_omap;
+ if (left > 0 && !cursor.omap_complete) {
+ ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(coll, oi.soid);
+ assert(iter);
+ if (iter->valid()) {
+ iter->upper_bound(cursor.omap_offset);
+ for (; left > 0 && iter->valid(); iter->next()) {
+ out_omap.insert(make_pair(iter->key(), iter->value()));
+ left -= iter->key().length() + 4 + iter->value().length() + 4;
+ }
+ }
+ if (iter->valid()) {
+ cursor.omap_offset = iter->key();
+ } else {
+ cursor.omap_complete = true;
+ }
+ }
+ ::encode(out_omap, osd_op.outdata);
+
+ ::encode(cursor, osd_op.outdata);
+ result = 0;
+ }
+ break;
+
default:
dout(1) << "unrecognized osd op " << op.op
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 6511d802952..1c1b457002c 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -2422,6 +2422,55 @@ void pg_missing_t::split_into(
}
}
+// -- object_copy_cursor_t --
+
+void object_copy_cursor_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ::encode(attr_complete, bl);
+ ::encode(data_offset, bl);
+ ::encode(data_complete, bl);
+ ::encode(omap_offset, bl);
+ ::encode(omap_complete, bl);
+ ENCODE_FINISH(bl);
+}
+
+void object_copy_cursor_t::decode(bufferlist::iterator &bl)
+{
+ DECODE_START(1, bl);
+ ::decode(attr_complete, bl);
+ ::decode(data_offset, bl);
+ ::decode(data_complete, bl);
+ ::decode(omap_offset, bl);
+ ::decode(omap_complete, bl);
+ DECODE_FINISH(bl);
+}
+
+void object_copy_cursor_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("attr_complete", (int)attr_complete);
+ f->dump_unsigned("data_offset", data_offset);
+ f->dump_unsigned("data_complete", (int)data_complete);
+ f->dump_string("omap_offset", omap_offset);
+ f->dump_unsigned("omap_complete", (int)omap_complete);
+}
+
+void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& o)
+{
+ o.push_back(new object_copy_cursor_t);
+ o.push_back(new object_copy_cursor_t);
+ o.back()->attr_complete = true;
+ o.back()->data_offset = 123;
+ o.push_back(new object_copy_cursor_t);
+ o.back()->attr_complete = true;
+ o.back()->data_complete = true;
+ o.back()->omap_offset = "foo";
+ o.push_back(new object_copy_cursor_t);
+ o.back()->attr_complete = true;
+ o.back()->data_complete = true;
+ o.back()->omap_complete = true;
+}
+
// -- pg_create_t --
void pg_create_t::encode(bufferlist &bl) const
@@ -3433,6 +3482,9 @@ ostream& operator<<(ostream& out, const OSDOp& op)
out << (op.op.watch.flag ? " add":" remove")
<< " cookie " << op.op.watch.cookie << " ver " << op.op.watch.ver;
break;
+ case CEPH_OSD_OP_COPY_GET:
+ out << " max " << op.op.copy_get.max;
+ break;
default:
out << " " << op.op.extent.offset << "~" << op.op.extent.length;
if (op.op.extent.truncate_seq)
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 3eb14246cc5..00e9409c98a 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -1816,6 +1816,37 @@ struct pg_ls_response_t {
WRITE_CLASS_ENCODER(pg_ls_response_t)
+/**
+ * object_copy_cursor_t
+ */
+struct object_copy_cursor_t {
+ bool attr_complete;
+ uint64_t data_offset;
+ bool data_complete;
+ string omap_offset;
+ bool omap_complete;
+
+ object_copy_cursor_t()
+ : attr_complete(false),
+ data_offset(0),
+ data_complete(false),
+ omap_complete(false)
+ {}
+
+ bool is_initial() const {
+ return !attr_complete && data_offset == 0 && omap_offset.empty();
+ }
+ bool is_complete() const {
+ return attr_complete && data_complete && omap_complete;
+ }
+
+ static void generate_test_instances(list<object_copy_cursor_t*>& o);
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(object_copy_cursor_t)
+
/**
* pg creation info
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index f4ef5b76291..91f62551729 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -567,6 +567,82 @@ struct ObjectOperation {
}
}
+ struct C_ObjectOperation_copyget : public Context {
+ bufferlist bl;
+ object_copy_cursor_t *cursor;
+ uint64_t *out_size;
+ utime_t *out_mtime;
+ std::map<std::string,bufferlist> *out_attrs;
+ bufferlist *out_data;
+ std::map<std::string,bufferlist> *out_omap;
+ int *prval;
+ C_ObjectOperation_copyget(object_copy_cursor_t *c,
+ uint64_t *s,
+ utime_t *m,
+ std::map<std::string,bufferlist> *a,
+ bufferlist *d,
+ std::map<std::string,bufferlist> *o,
+ int *r)
+ : cursor(c),
+ out_size(s), out_mtime(m), out_attrs(a),
+ out_data(d), out_omap(o), prval(r) {}
+ void finish(int r) {
+ if (r < 0)
+ return;
+ try {
+ bufferlist::iterator p = bl.begin();
+ uint64_t size;
+ ::decode(size, p);
+ if (out_size)
+ *out_size = size;
+ utime_t mtime;
+ ::decode(mtime, p);
+ if (out_mtime)
+ *out_mtime = mtime;
+ if (out_attrs) {
+ ::decode_noclear(*out_attrs, p);
+ } else {
+ std::map<std::string,bufferlist> t;
+ ::decode(t, p);
+ }
+ bufferlist bl;
+ ::decode(bl, p);
+ if (out_data)
+ out_data->claim_append(bl);
+ if (out_omap) {
+ ::decode_noclear(*out_omap, p);
+ } else {
+ std::map<std::string,bufferlist> t;
+ ::decode(t, p);
+ }
+ ::decode(*cursor, p);
+ } catch (buffer::error& e) {
+ if (prval)
+ *prval = -EIO;
+ }
+ }
+ };
+
+ void copy_get(object_copy_cursor_t *cursor,
+ uint64_t max,
+ uint64_t *out_size,
+ utime_t *out_mtime,
+ std::map<std::string,bufferlist> *out_attrs,
+ bufferlist *out_data,
+ std::map<std::string,bufferlist> *out_omap,
+ int *prval) {
+ OSDOp& osd_op = add_op(CEPH_OSD_OP_COPY_GET);
+ osd_op.op.copy_get.max = max;
+ ::encode(*cursor, osd_op.indata);
+ ::encode(max, osd_op.indata);
+ unsigned p = ops.size() - 1;
+ out_rval[p] = prval;
+ C_ObjectOperation_copyget *h =
+ new C_ObjectOperation_copyget(cursor, out_size, out_mtime, out_attrs, out_data, out_omap, prval);
+ out_bl[p] = &h->bl;
+ out_handler[p] = h;
+ }
+
void omap_get_header(bufferlist *bl, int *prval) {
add_op(CEPH_OSD_OP_OMAPGETHEADER);
unsigned p = ops.size() - 1;
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index 213da6fcccc..a6f7cfb7883 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -53,6 +53,7 @@ TYPE(pg_log_t)
TYPE(pg_missing_t::item)
TYPE(pg_missing_t)
TYPE(pg_ls_response_t)
+TYPE(object_copy_cursor_t)
TYPE(pg_create_t)
TYPE(watch_info_t)
TYPE(object_info_t)