diff options
author | Sage Weil <sage@inktank.com> | 2013-08-30 15:24:41 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-08-30 15:24:41 -0700 |
commit | b882aa2ace54099a1b5c2ce5b25ac29e29b9ec14 (patch) | |
tree | 318be2387f4914acb533f9756b0a96cd0bb724a1 | |
parent | b30a1b288996c2f7a6471f38c13030e6047052a2 (diff) | |
parent | 13aac48f25f25dd8286b9d3148f99a66b44bd962 (diff) | |
download | ceph-b882aa2ace54099a1b5c2ce5b25ac29e29b9ec14.tar.gz |
Merge pull request #560 from ceph/wip-6032-cache-objecter
Wip 6032 cache objecter
Reviewed-by: Sage Weil <sage@inktank.com>
-rwxr-xr-x | qa/workunits/rados/caching_redirects.sh | 59 | ||||
-rw-r--r-- | src/osd/osd_types.h | 1 | ||||
-rw-r--r-- | src/osdc/Objecter.cc | 22 | ||||
-rw-r--r-- | src/osdc/Objecter.h | 9 |
4 files changed, 82 insertions, 9 deletions
diff --git a/qa/workunits/rados/caching_redirects.sh b/qa/workunits/rados/caching_redirects.sh new file mode 100755 index 00000000000..a8eda487246 --- /dev/null +++ b/qa/workunits/rados/caching_redirects.sh @@ -0,0 +1,59 @@ +#!/bin/bash -x + +set -e + +expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + + +#create pools, set up tier relationship +ceph osd pool create base_pool 2 +ceph osd pool create empty_cache 2 +ceph osd pool create data_cache 2 +ceph osd tier add base_pool empty_cache +ceph osd tier add base_pool data_cache + +# populate base_pool and data_cache with some data +echo "foo" > foo.txt +echo "bar" > bar.txt +echo "baz" > baz.txt +rados -p base_pool put fooobj foo.txt +rados -p base_pool put barobj bar.txt +# data_cache is backwards so we can tell we read from it +rados -p data_cache put fooobj bar.txt +rados -p data_cache put barobj foo.txt + +# get the objects back before setting a caching pool +rados -p base_pool get fooobj tmp.txt +diff -q tmp.txt foo.txt +rados -p base_pool get barobj tmp.txt +diff -q tmp.txt bar.txt + +# set up redirect and make sure we get nothing +ceph osd tier set-overlay base_pool empty_cache +expect_false rados -p base_pool get fooobj tmp.txt +expect_false rados -p base_pool get barobj tmp.txt +#let's write as well +rados -p base_pool put fooobj baz.txt +rados -p base_pool put barobj baz.txt +#and make sure we can look at the cache pool directly +rados -p empty_cache get fooobj tmp.txt +diff -q tmp.txt baz.txt + +# switch cache pools and make sure contents differ +ceph osd tier remove-overlay base_pool +ceph osd tier set-overlay base_pool data_cache +rados -p base_pool get fooobj tmp.txt +diff -q tmp.txt bar.txt +rados -p base_pool get barobj tmp.txt +diff -q tmp.txt foo.txt + +# drop the cache entirely and make sure contents are still the same +ceph osd tier remove-overlay base_pool +rados -p base_pool get fooobj tmp.txt +diff -q tmp.txt foo.txt +rados -p base_pool get barobj tmp.txt +diff -q tmp.txt bar.txt diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index f3a307e3040..3eb14246cc5 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -805,6 +805,7 @@ public: set<uint64_t> tiers; ///< pools that are tiers of us int64_t tier_of; ///< pool for which we are a tier + // Note that write wins for read+write ops int64_t read_tier; ///< pool/tier for objecter to direct reads to int64_t write_tier; ///< pool/tier for objecter to direct writes to cache_mode_t cache_mode; ///< cache pool mode diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 9fb0bfa446d..39378521b09 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1243,7 +1243,7 @@ tid_t Objecter::_op_submit(Op *op) // send? ldout(cct, 10) << "op_submit oid " << op->oid - << " " << op->oloc + << " " << op->base_oloc << " " << op->target_oloc << " " << op->ops << " tid " << op->tid << " osd." << (op->session ? op->session->osd : -1) << dendl; @@ -1297,12 +1297,23 @@ int Objecter::recalc_op_target(Op *op) { vector<int> acting; pg_t pgid = op->pgid; + + bool is_read = op->flags & CEPH_OSD_FLAG_READ; + bool is_write = op->flags & CEPH_OSD_FLAG_WRITE; + + op->target_oloc = op->base_oloc; + if (is_read && osdmap->get_pg_pool(op->base_oloc.pool)->has_read_tier()) + op->target_oloc.pool = osdmap->get_pg_pool(op->base_oloc.pool)->read_tier; + if (is_write && osdmap->get_pg_pool(op->base_oloc.pool)->has_write_tier()) + op->target_oloc.pool = osdmap->get_pg_pool(op->base_oloc.pool)->write_tier; + if (op->precalc_pgid) { + assert(op->oid.name.empty()); // make sure this is a listing op ldout(cct, 10) << "recalc_op_target have " << pgid << " pool " << osdmap->have_pg_pool(pgid.pool()) << dendl; if (!osdmap->have_pg_pool(pgid.pool())) return RECALC_OP_TARGET_POOL_DNE; } else { - int ret = osdmap->object_locator_to_pg(op->oid, op->oloc, pgid); + int ret = osdmap->object_locator_to_pg(op->oid, op->target_oloc, pgid); if (ret == -ENOENT) return RECALC_OP_TARGET_POOL_DNE; } @@ -1318,7 +1329,7 @@ int Objecter::recalc_op_target(Op *op) op->used_replica = false; if (!acting.empty()) { int osd; - bool read = (op->flags & CEPH_OSD_FLAG_READ) && (op->flags & CEPH_OSD_FLAG_WRITE) == 0; + bool read = is_read && !is_write; if (read && (op->flags & CEPH_OSD_FLAG_BALANCE_READS)) { int p = rand() % acting.size(); if (p) @@ -1444,7 +1455,7 @@ void Objecter::send_op(Op *op) op->stamp = ceph_clock_now(cct); MOSDOp *m = new MOSDOp(client_inc, op->tid, - op->oid, op->oloc, op->pgid, osdmap->get_epoch(), + op->oid, op->target_oloc, op->pgid, osdmap->get_epoch(), flags); m->set_snapid(op->snapid); @@ -2210,7 +2221,8 @@ void Objecter::dump_ops(Formatter *fmt) const fmt->dump_stream("last_sent") << op->stamp; fmt->dump_int("attempts", op->attempts); fmt->dump_stream("object_id") << op->oid; - fmt->dump_stream("object_locator") << op->oloc; + fmt->dump_stream("object_locator") << op->base_oloc; + fmt->dump_stream("target_object_locator") << op->target_oloc; fmt->dump_stream("snapid") << op->snapid; fmt->dump_stream("snap_context") << op->snapc; fmt->dump_stream("mtime") << op->mtime; diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 7041ab984f7..be756054497 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -745,7 +745,8 @@ public: int incarnation; object_t oid; - object_locator_t oloc; + object_locator_t base_oloc; + object_locator_t target_oloc; pg_t pgid; vector<int> acting; @@ -789,7 +790,7 @@ public: Op(const object_t& o, const object_locator_t& ol, vector<OSDOp>& op, int f, Context *ac, Context *co, version_t *ov) : session(NULL), session_item(this), incarnation(0), - oid(o), oloc(ol), + oid(o), base_oloc(ol), used_replica(false), con(NULL), snapid(CEPH_NOSNAP), outbl(NULL), @@ -811,8 +812,8 @@ public: out_rval[i] = NULL; } - if (oloc.key == o) - oloc.key.clear(); + if (base_oloc.key == o) + base_oloc.key.clear(); } ~Op() { while (!out_handler.empty()) { |