summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-08-30 15:24:41 -0700
committerSage Weil <sage@inktank.com>2013-08-30 15:24:41 -0700
commitb882aa2ace54099a1b5c2ce5b25ac29e29b9ec14 (patch)
tree318be2387f4914acb533f9756b0a96cd0bb724a1
parentb30a1b288996c2f7a6471f38c13030e6047052a2 (diff)
parent13aac48f25f25dd8286b9d3148f99a66b44bd962 (diff)
downloadceph-b882aa2ace54099a1b5c2ce5b25ac29e29b9ec14.tar.gz
Merge pull request #560 from ceph/wip-6032-cache-objecter
Wip 6032 cache objecter Reviewed-by: Sage Weil <sage@inktank.com>
-rwxr-xr-xqa/workunits/rados/caching_redirects.sh59
-rw-r--r--src/osd/osd_types.h1
-rw-r--r--src/osdc/Objecter.cc22
-rw-r--r--src/osdc/Objecter.h9
4 files changed, 82 insertions, 9 deletions
diff --git a/qa/workunits/rados/caching_redirects.sh b/qa/workunits/rados/caching_redirects.sh
new file mode 100755
index 00000000000..a8eda487246
--- /dev/null
+++ b/qa/workunits/rados/caching_redirects.sh
@@ -0,0 +1,59 @@
+#!/bin/bash -x
+
+set -e
+
+expect_false()
+{
+ set -x
+ if "$@"; then return 1; else return 0; fi
+}
+
+
+#create pools, set up tier relationship
+ceph osd pool create base_pool 2
+ceph osd pool create empty_cache 2
+ceph osd pool create data_cache 2
+ceph osd tier add base_pool empty_cache
+ceph osd tier add base_pool data_cache
+
+# populate base_pool and data_cache with some data
+echo "foo" > foo.txt
+echo "bar" > bar.txt
+echo "baz" > baz.txt
+rados -p base_pool put fooobj foo.txt
+rados -p base_pool put barobj bar.txt
+# data_cache is backwards so we can tell we read from it
+rados -p data_cache put fooobj bar.txt
+rados -p data_cache put barobj foo.txt
+
+# get the objects back before setting a caching pool
+rados -p base_pool get fooobj tmp.txt
+diff -q tmp.txt foo.txt
+rados -p base_pool get barobj tmp.txt
+diff -q tmp.txt bar.txt
+
+# set up redirect and make sure we get nothing
+ceph osd tier set-overlay base_pool empty_cache
+expect_false rados -p base_pool get fooobj tmp.txt
+expect_false rados -p base_pool get barobj tmp.txt
+#let's write as well
+rados -p base_pool put fooobj baz.txt
+rados -p base_pool put barobj baz.txt
+#and make sure we can look at the cache pool directly
+rados -p empty_cache get fooobj tmp.txt
+diff -q tmp.txt baz.txt
+
+# switch cache pools and make sure contents differ
+ceph osd tier remove-overlay base_pool
+ceph osd tier set-overlay base_pool data_cache
+rados -p base_pool get fooobj tmp.txt
+diff -q tmp.txt bar.txt
+rados -p base_pool get barobj tmp.txt
+diff -q tmp.txt foo.txt
+
+# drop the cache entirely and make sure contents are still the same
+ceph osd tier remove-overlay base_pool
+rados -p base_pool get fooobj tmp.txt
+diff -q tmp.txt foo.txt
+rados -p base_pool get barobj tmp.txt
+diff -q tmp.txt bar.txt
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index f3a307e3040..3eb14246cc5 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -805,6 +805,7 @@ public:
set<uint64_t> tiers; ///< pools that are tiers of us
int64_t tier_of; ///< pool for which we are a tier
+ // Note that write wins for read+write ops
int64_t read_tier; ///< pool/tier for objecter to direct reads to
int64_t write_tier; ///< pool/tier for objecter to direct writes to
cache_mode_t cache_mode; ///< cache pool mode
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index 9fb0bfa446d..39378521b09 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -1243,7 +1243,7 @@ tid_t Objecter::_op_submit(Op *op)
// send?
ldout(cct, 10) << "op_submit oid " << op->oid
- << " " << op->oloc
+ << " " << op->base_oloc << " " << op->target_oloc
<< " " << op->ops << " tid " << op->tid
<< " osd." << (op->session ? op->session->osd : -1)
<< dendl;
@@ -1297,12 +1297,23 @@ int Objecter::recalc_op_target(Op *op)
{
vector<int> acting;
pg_t pgid = op->pgid;
+
+ bool is_read = op->flags & CEPH_OSD_FLAG_READ;
+ bool is_write = op->flags & CEPH_OSD_FLAG_WRITE;
+
+ op->target_oloc = op->base_oloc;
+ if (is_read && osdmap->get_pg_pool(op->base_oloc.pool)->has_read_tier())
+ op->target_oloc.pool = osdmap->get_pg_pool(op->base_oloc.pool)->read_tier;
+ if (is_write && osdmap->get_pg_pool(op->base_oloc.pool)->has_write_tier())
+ op->target_oloc.pool = osdmap->get_pg_pool(op->base_oloc.pool)->write_tier;
+
if (op->precalc_pgid) {
+ assert(op->oid.name.empty()); // make sure this is a listing op
ldout(cct, 10) << "recalc_op_target have " << pgid << " pool " << osdmap->have_pg_pool(pgid.pool()) << dendl;
if (!osdmap->have_pg_pool(pgid.pool()))
return RECALC_OP_TARGET_POOL_DNE;
} else {
- int ret = osdmap->object_locator_to_pg(op->oid, op->oloc, pgid);
+ int ret = osdmap->object_locator_to_pg(op->oid, op->target_oloc, pgid);
if (ret == -ENOENT)
return RECALC_OP_TARGET_POOL_DNE;
}
@@ -1318,7 +1329,7 @@ int Objecter::recalc_op_target(Op *op)
op->used_replica = false;
if (!acting.empty()) {
int osd;
- bool read = (op->flags & CEPH_OSD_FLAG_READ) && (op->flags & CEPH_OSD_FLAG_WRITE) == 0;
+ bool read = is_read && !is_write;
if (read && (op->flags & CEPH_OSD_FLAG_BALANCE_READS)) {
int p = rand() % acting.size();
if (p)
@@ -1444,7 +1455,7 @@ void Objecter::send_op(Op *op)
op->stamp = ceph_clock_now(cct);
MOSDOp *m = new MOSDOp(client_inc, op->tid,
- op->oid, op->oloc, op->pgid, osdmap->get_epoch(),
+ op->oid, op->target_oloc, op->pgid, osdmap->get_epoch(),
flags);
m->set_snapid(op->snapid);
@@ -2210,7 +2221,8 @@ void Objecter::dump_ops(Formatter *fmt) const
fmt->dump_stream("last_sent") << op->stamp;
fmt->dump_int("attempts", op->attempts);
fmt->dump_stream("object_id") << op->oid;
- fmt->dump_stream("object_locator") << op->oloc;
+ fmt->dump_stream("object_locator") << op->base_oloc;
+ fmt->dump_stream("target_object_locator") << op->target_oloc;
fmt->dump_stream("snapid") << op->snapid;
fmt->dump_stream("snap_context") << op->snapc;
fmt->dump_stream("mtime") << op->mtime;
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 7041ab984f7..be756054497 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -745,7 +745,8 @@ public:
int incarnation;
object_t oid;
- object_locator_t oloc;
+ object_locator_t base_oloc;
+ object_locator_t target_oloc;
pg_t pgid;
vector<int> acting;
@@ -789,7 +790,7 @@ public:
Op(const object_t& o, const object_locator_t& ol, vector<OSDOp>& op,
int f, Context *ac, Context *co, version_t *ov) :
session(NULL), session_item(this), incarnation(0),
- oid(o), oloc(ol),
+ oid(o), base_oloc(ol),
used_replica(false), con(NULL),
snapid(CEPH_NOSNAP),
outbl(NULL),
@@ -811,8 +812,8 @@ public:
out_rval[i] = NULL;
}
- if (oloc.key == o)
- oloc.key.clear();
+ if (base_oloc.key == o)
+ base_oloc.key.clear();
}
~Op() {
while (!out_handler.empty()) {