summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Farnum <gregory.farnum@dreamhost.com>2011-09-02 14:04:41 -0700
committerGreg Farnum <gregory.farnum@dreamhost.com>2011-09-02 14:04:41 -0700
commit2982f6768c1f3be0bba6771c95aca7099de9f8e4 (patch)
tree29bef8c13554312a49220873974ddf08dcdc7fda
parent8b9ca2a5401516ca48d722a052c9380bde1cd666 (diff)
parent22cc333c77df1de03b8fb465a068612808f529b8 (diff)
downloadceph-2982f6768c1f3be0bba6771c95aca7099de9f8e4.tar.gz
Merge branch 'wip-flock'
-rw-r--r--src/Makefile.am2
-rw-r--r--src/mds/flock.cc488
-rw-r--r--src/mds/flock.h542
3 files changed, 557 insertions, 475 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index e53b68def91..3e9fa587142 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -885,6 +885,7 @@ libmds_a_SOURCES = \
mds/Dumper.cc \
mds/Resetter.cc \
mds/MDS.cc \
+ mds/flock.cc \
mds/locks.c \
mds/journal.cc \
mds/Server.cc \
@@ -1118,7 +1119,6 @@ noinst_HEADERS = \
include/rbd/librbd.h\
include/rbd/librbd.hpp\
logrotate.conf\
- mds/flock.h\
mds/inode_backtrace.h\
mds/locks.c\
mds/locks.h\
diff --git a/src/mds/flock.cc b/src/mds/flock.cc
new file mode 100644
index 00000000000..b97aa013118
--- /dev/null
+++ b/src/mds/flock.cc
@@ -0,0 +1,488 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include <errno.h>
+
+#include "common/debug.h"
+#include "mdstypes.h"
+#include "mds/flock.h"
+
+bool ceph_lock_state_t::is_waiting(ceph_filelock &fl)
+{
+ multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+ while (p != waiting_locks.end()) {
+ if (p->second.start > fl.start)
+ return false;
+ if (p->second.length == fl.length &&
+ p->second.client == fl.client &&
+ p->second.pid == fl.pid &&
+ p->second.pid_namespace == fl.pid_namespace)
+ return true;
+ ++p;
+ }
+ return false;
+}
+
+void ceph_lock_state_t::remove_waiting(ceph_filelock& fl)
+{
+ multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+ while (p != waiting_locks.end()) {
+ if (p->second.start > fl.start)
+ return;
+ if (p->second.length == fl.length &&
+ p->second.client == fl.client &&
+ p->second.pid == fl.pid &&
+ p->second.pid_namespace == fl.pid_namespace) {
+ waiting_locks.erase(p);
+ return;
+ }
+ ++p;
+ }
+}
+
+bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock, bool wait_on_fail)
+{
+ dout(15) << "add_lock " << new_lock << dendl;
+ bool ret = false;
+ list<multimap<uint64_t, ceph_filelock>::iterator>
+ overlapping_locks, self_overlapping_locks, neighbor_locks;
+
+ // first, get any overlapping locks and split them into owned-by-us and not
+ if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
+ dout(15) << "got overlapping lock, splitting by owner" << dendl;
+ split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
+ }
+ if (!overlapping_locks.empty()) { //overlapping locks owned by others :(
+ if (CEPH_LOCK_EXCL == new_lock.type) {
+ //can't set, we want an exclusive
+ dout(15) << "overlapping lock, and this lock is exclusive, can't set"
+ << dendl;
+ if (wait_on_fail) {
+ waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+ }
+ } else { //shared lock, check for any exclusive locks blocking us
+ if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
+ dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
+ if (wait_on_fail) {
+ waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+ }
+ } else {
+ //yay, we can insert a shared lock
+ dout(15) << "inserting shared lock" << dendl;
+ adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
+ held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+ ret = true;
+ }
+ }
+ } else { //no overlapping locks except our own
+ adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
+ dout(15) << "no conflicts, inserting " << new_lock << dendl;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (new_lock.start, new_lock));
+ ret = true;
+ }
+ if (ret)
+ ++client_held_lock_counts[(client_t)new_lock.client];
+ else if (wait_on_fail)
+ ++client_waiting_lock_counts[(client_t)new_lock.client];
+ return ret;
+}
+
+void ceph_lock_state_t::look_for_lock(ceph_filelock& testing_lock)
+{
+ list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
+ self_overlapping_locks;
+ if (get_overlapping_locks(testing_lock, overlapping_locks)) {
+ split_by_owner(testing_lock, overlapping_locks, self_overlapping_locks);
+ }
+ if (!overlapping_locks.empty()) { //somebody else owns overlapping lock
+ if (CEPH_LOCK_EXCL == testing_lock.type) { //any lock blocks it
+ testing_lock = (*overlapping_locks.begin())->second;
+ } else {
+ ceph_filelock *blocking_lock;
+ if ((blocking_lock = contains_exclusive_lock(overlapping_locks))) {
+ testing_lock = *blocking_lock;
+ } else { //nothing blocking!
+ testing_lock.type = CEPH_LOCK_UNLOCK;
+ }
+ }
+ return;
+ }
+ //if we get here, only our own locks block
+ testing_lock.type = CEPH_LOCK_UNLOCK;
+}
+
+void ceph_lock_state_t::remove_lock(ceph_filelock removal_lock,
+ list<ceph_filelock>& activated_locks)
+{
+ list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
+ self_overlapping_locks, crossed_waiting_locks;
+ if (get_overlapping_locks(removal_lock, overlapping_locks)) {
+ dout(15) << "splitting by owner" << dendl;
+ split_by_owner(removal_lock, overlapping_locks, self_overlapping_locks);
+ } else dout(15) << "attempt to remove lock at " << removal_lock.start
+ << " but no locks there!" << dendl;
+ bool remove_to_end = (0 == removal_lock.length);
+ bool old_lock_to_end;
+ uint64_t removal_start = removal_lock.start;
+ uint64_t removal_end = removal_start + removal_lock.length - 1;
+ uint64_t old_lock_end;
+ __s64 old_lock_client = 0;
+ ceph_filelock *old_lock;
+
+ dout(15) << "examining " << self_overlapping_locks.size()
+ << " self-overlapping locks for removal" << dendl;
+ for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = self_overlapping_locks.begin();
+ iter != self_overlapping_locks.end();
+ ++iter) {
+ dout(15) << "self overlapping lock " << (*iter)->second << dendl;
+ old_lock = &(*iter)->second;
+ old_lock_to_end = (0 == old_lock->length);
+ old_lock_end = old_lock->start + old_lock->length - 1;
+ old_lock_client = old_lock->client;
+ if (remove_to_end) {
+ if (old_lock->start < removal_start) {
+ old_lock->length = removal_start - old_lock->start;
+ } else {
+ dout(15) << "erasing " << (*iter)->second << dendl;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ } else if (old_lock_to_end) {
+ ceph_filelock append_lock = *old_lock;
+ append_lock.start = removal_end+1;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (append_lock.start, append_lock));
+ ++client_held_lock_counts[(client_t)old_lock->client];
+ if (old_lock->start >= removal_start) {
+ dout(15) << "erasing " << (*iter)->second << dendl;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ } else old_lock->length = removal_start - old_lock->start;
+ } else {
+ if (old_lock_end > removal_end) {
+ ceph_filelock append_lock = *old_lock;
+ append_lock.start = removal_end + 1;
+ append_lock.length = old_lock_end - append_lock.start + 1;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (append_lock.start, append_lock));
+ ++client_held_lock_counts[(client_t)old_lock->client];
+ }
+ if (old_lock->start < removal_start) {
+ old_lock->length = removal_start - old_lock->start;
+ } else {
+ dout(15) << "erasing " << (*iter)->second << dendl;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ }
+ if (!client_held_lock_counts[old_lock_client]) {
+ client_held_lock_counts.erase(old_lock_client);
+ }
+ }
+}
+
+bool ceph_lock_state_t::remove_all_from (client_t client)
+{
+ bool cleared_any = false;
+ if (client_held_lock_counts.count(client)) {
+ remove_all_from(client, held_locks);
+ client_held_lock_counts.erase(client);
+ cleared_any = true;
+ }
+ if (client_waiting_lock_counts.count(client)) {
+ remove_all_from(client, waiting_locks);
+ client_waiting_lock_counts.erase(client);
+ }
+ return cleared_any;
+}
+
+void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::iterator> old_locks,
+ ceph_filelock& new_lock,
+ list<multimap<uint64_t, ceph_filelock>::iterator>
+ neighbor_locks)
+{
+ dout(15) << "adjust_locks" << dendl;
+ bool new_lock_to_end = (0 == new_lock.length);
+ bool old_lock_to_end;
+ uint64_t new_lock_start = new_lock.start;
+ uint64_t new_lock_end = new_lock.start + new_lock.length - 1;
+ uint64_t old_lock_start, old_lock_end;
+ __s64 old_lock_client = 0;
+ ceph_filelock *old_lock;
+ for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = old_locks.begin();
+ iter != old_locks.end();
+ ++iter) {
+ old_lock = &(*iter)->second;
+ dout(15) << "adjusting lock: " << *old_lock << dendl;
+ old_lock_to_end = (0 == old_lock->length);
+ old_lock_start = old_lock->start;
+ old_lock_end = old_lock->start + old_lock->length - 1;
+ new_lock_start = new_lock.start;
+ new_lock_end = new_lock.start + new_lock.length - 1;
+ old_lock_client = old_lock->client;
+ if (new_lock_to_end || old_lock_to_end) {
+ //special code path to deal with a length set at 0
+ dout(15) << "one lock extends forever" << dendl;
+ if (old_lock->type == new_lock.type) {
+ //just unify them in new lock, remove old lock
+ dout(15) << "same lock type, unifying" << dendl;
+ new_lock.start = (new_lock_start < old_lock_start) ? new_lock_start :
+ old_lock_start;
+ new_lock.length = 0;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ } else { //not same type, have to keep any remains of old lock around
+ dout(15) << "shrinking old lock" << dendl;
+ if (new_lock_to_end) {
+ if (old_lock_start < new_lock_start) {
+ old_lock->length = new_lock_start - old_lock_start;
+ } else {
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ } else { //old lock extends past end of new lock
+ ceph_filelock appended_lock = *old_lock;
+ appended_lock.start = new_lock_end + 1;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (appended_lock.start, appended_lock));
+ ++client_held_lock_counts[(client_t)old_lock->client];
+ if (old_lock_start < new_lock_start) {
+ old_lock->length = new_lock_start - old_lock_start;
+ } else {
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ }
+ }
+ } else {
+ if (old_lock->type == new_lock.type) { //just merge them!
+ dout(15) << "merging locks, they're the same type" << dendl;
+ new_lock.start = (old_lock_start < new_lock_start ) ? old_lock_start :
+ new_lock_start;
+ int new_end = (new_lock_end > old_lock_end) ? new_lock_end :
+ old_lock_end;
+ new_lock.length = new_end - new_lock.start + 1;
+ dout(15) << "erasing lock " << (*iter)->second << dendl;
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ } else { //we'll have to update sizes and maybe make new locks
+ dout(15) << "locks aren't same type, changing sizes" << dendl;
+ if (old_lock_end > new_lock_end) { //add extra lock after new_lock
+ ceph_filelock appended_lock = *old_lock;
+ appended_lock.start = new_lock_end + 1;
+ appended_lock.length = old_lock_end - appended_lock.start + 1;
+ held_locks.insert(pair<uint64_t, ceph_filelock>
+ (appended_lock.start, appended_lock));
+ ++client_held_lock_counts[(client_t)old_lock->client];
+ }
+ if (old_lock_start < new_lock_start) {
+ old_lock->length = new_lock_start - old_lock_start;
+ } else { //old_lock starts inside new_lock, so remove it
+ //if it extended past new_lock_end it's been replaced
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ }
+ }
+ if (!client_held_lock_counts[old_lock_client]) {
+ client_held_lock_counts.erase(old_lock_client);
+ }
+ }
+
+ //make sure to coalesce neighboring locks
+ for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = neighbor_locks.begin();
+ iter != neighbor_locks.end();
+ ++iter) {
+ old_lock = &(*iter)->second;
+ old_lock_client = old_lock->client;
+ dout(15) << "lock to coalesce: " << *old_lock << dendl;
+ /* because if it's a neibhoring lock there can't be any self-overlapping
+ locks that covered it */
+ if (old_lock->type == new_lock.type) { //merge them
+ if (0 == new_lock.length) {
+ if (old_lock->start + old_lock->length == new_lock.start) {
+ new_lock.start = old_lock->start;
+ } else assert(0); /* if there's no end to new_lock, the neighbor
+ HAS TO be to left side */
+ } else if (0 == old_lock->length) {
+ if (new_lock.start + new_lock.length == old_lock->start) {
+ new_lock.length = 0;
+ } else assert(0); //same as before, but reversed
+ } else {
+ if (old_lock->start + old_lock->length == new_lock.start) {
+ new_lock.start = old_lock->start;
+ new_lock.length = old_lock->length + new_lock.length;
+ } else if (new_lock.start + new_lock.length == old_lock->start) {
+ new_lock.length = old_lock->length + new_lock.length;
+ }
+ }
+ held_locks.erase(*iter);
+ --client_held_lock_counts[old_lock_client];
+ }
+ if (!client_held_lock_counts[old_lock_client]) {
+ client_held_lock_counts.erase(old_lock_client);
+ }
+ }
+}
+
+void ceph_lock_state_t::remove_all_from(client_t client,
+ multimap<uint64_t,
+ ceph_filelock>& locks)
+{
+ multimap<uint64_t, ceph_filelock>::iterator iter = locks.begin();
+ while (iter != locks.end()) {
+ if ((client_t)iter->second.client == client) {
+ locks.erase(iter++);
+ } else ++iter;
+ }
+}
+
+multimap<uint64_t, ceph_filelock>::iterator
+ceph_lock_state_t::get_lower_bound(uint64_t start,
+ multimap<uint64_t, ceph_filelock>& lock_map)
+{
+ multimap<uint64_t, ceph_filelock>::iterator lower_bound =
+ lock_map.lower_bound(start);
+ if ((lower_bound->first != start)
+ && (start != 0)
+ && (lower_bound != lock_map.begin())) --lower_bound;
+ if (lock_map.end() == lower_bound)
+ dout(15) << "get_lower_dout(15)eturning end()" << dendl;
+ else dout(15) << "get_lower_bound returning iterator pointing to "
+ << lower_bound->second << dendl;
+ return lower_bound;
+ }
+
+multimap<uint64_t, ceph_filelock>::iterator
+ceph_lock_state_t::get_last_before(uint64_t end,
+ multimap<uint64_t, ceph_filelock>& lock_map)
+{
+ multimap<uint64_t, ceph_filelock>::iterator last =
+ lock_map.upper_bound(end);
+ if (last != lock_map.begin()) --last;
+ if (lock_map.end() == last)
+ dout(15) << "get_last_before returning end()" << dendl;
+ else dout(15) << "get_last_before returning iterator pointing to "
+ << last->second << dendl;
+ return last;
+}
+
+bool ceph_lock_state_t::share_space(
+ multimap<uint64_t, ceph_filelock>::iterator& iter,
+ uint64_t start, uint64_t end)
+{
+ bool ret = ((iter->first >= start && iter->first <= end) ||
+ ((iter->first < start) &&
+ (((iter->first + iter->second.length - 1) >= start) ||
+ (0 == iter->second.length))));
+ dout(15) << "share_space got start: " << start << ", end: " << end
+ << ", lock: " << iter->second << ", returning " << ret << dendl;
+ return ret;
+}
+
+bool ceph_lock_state_t::get_overlapping_locks(ceph_filelock& lock,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> & overlaps,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> *self_neighbors)
+{
+ dout(15) << "get_overlapping_locks" << dendl;
+ // create a lock starting one earlier and ending one later
+ // to check for neighbors
+ ceph_filelock neighbor_check_lock = lock;
+ if (neighbor_check_lock.start != 0) {
+ neighbor_check_lock.start = neighbor_check_lock.start - 1;
+ if (neighbor_check_lock.length)
+ neighbor_check_lock.length = neighbor_check_lock.length + 2;
+ } else {
+ if (neighbor_check_lock.length)
+ neighbor_check_lock.length = neighbor_check_lock.length + 1;
+ }
+ //find the last held lock starting at the point after lock
+ uint64_t endpoint = lock.start;
+ if (lock.length) {
+ endpoint += lock.length;
+ } else {
+ endpoint = uint64_t(-1); // max offset
+ }
+ multimap<uint64_t, ceph_filelock>::iterator iter =
+ get_last_before(endpoint, held_locks);
+ bool cont = iter != held_locks.end();
+ while(cont) {
+ if (share_space(iter, lock)) {
+ overlaps.push_front(iter);
+ } else if (self_neighbors &&
+ (neighbor_check_lock.client == iter->second.client) &&
+ (neighbor_check_lock.pid == iter->second.pid) &&
+ share_space(iter, neighbor_check_lock)) {
+ self_neighbors->push_front(iter);
+ }
+ if ((iter->first < lock.start) && (CEPH_LOCK_EXCL == iter->second.type)) {
+ //can't be any more overlapping locks or they'd interfere with this one
+ cont = false;
+ } else if (held_locks.begin() == iter) cont = false;
+ else --iter;
+ }
+ return !overlaps.empty();
+}
+
+bool ceph_lock_state_t::get_waiting_overlaps(ceph_filelock& lock,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator>&
+ overlaps)
+{
+ dout(15) << "get_waiting_overlaps" << dendl;
+ multimap<uint64_t, ceph_filelock>::iterator iter =
+ get_last_before(lock.start + lock.length - 1, waiting_locks);
+ bool cont = iter != waiting_locks.end();
+ while(cont) {
+ if (share_space(iter, lock)) overlaps.push_front(iter);
+ if (waiting_locks.begin() == iter) cont = false;
+ --iter;
+ }
+ return !overlaps.empty();
+}
+
+void ceph_lock_state_t::split_by_owner(ceph_filelock& owner,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator>& locks,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator>&
+ owned_locks)
+{
+ list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = locks.begin();
+ dout(15) << "owner lock: " << owner << dendl;
+ while (iter != locks.end()) {
+ dout(15) << "comparing to " << (*iter)->second << dendl;
+ if ((*iter)->second.client == owner.client &&
+ (*iter)->second.pid_namespace == owner.pid_namespace &&
+ (*iter)->second.pid == owner.pid) {
+ dout(15) << "success, pushing to owned_locks" << dendl;
+ owned_locks.push_back(*iter);
+ iter = locks.erase(iter);
+ } else {
+ dout(15) << "failure, something not equal in this group "
+ << (*iter)->second.client << ":" << owner.client << ","
+ << (*iter)->second.pid_namespace << ":" << owner.pid_namespace
+ << "," << (*iter)->second.pid << ":" << owner.pid << dendl;
+ ++iter;
+ }
+ }
+}
+
+ceph_filelock *
+ceph_lock_state_t::contains_exclusive_lock(list<multimap<uint64_t,
+ ceph_filelock>::iterator>& locks)
+{
+ for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+ iter = locks.begin();
+ iter != locks.end();
+ ++iter) {
+ if (CEPH_LOCK_EXCL == (*iter)->second.type) return &(*iter)->second;
+ }
+ return NULL;
+}
diff --git a/src/mds/flock.h b/src/mds/flock.h
index 69228b46c90..133feabee66 100644
--- a/src/mds/flock.h
+++ b/src/mds/flock.h
@@ -26,239 +26,63 @@ inline bool operator==(ceph_filelock& l, ceph_filelock& r) {
l.type == r.type;
}
-struct ceph_lock_state_t {
+class ceph_lock_state_t {
+public:
multimap<uint64_t, ceph_filelock> held_locks; // current locks
multimap<uint64_t, ceph_filelock> waiting_locks; // locks waiting for other locks
// both of the above are keyed by starting offset
map<client_t, int> client_held_lock_counts;
map<client_t, int> client_waiting_lock_counts;
- bool is_waiting(ceph_filelock &fl) {
- multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
- while (p != waiting_locks.end()) {
- if (p->second.start > fl.start)
- return false;
- if (p->second.length == fl.length &&
- p->second.client == fl.client &&
- p->second.pid == fl.pid &&
- p->second.pid_namespace == fl.pid_namespace)
- return true;
- ++p;
- }
- return false;
- }
- void remove_waiting(ceph_filelock& fl) {
- multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
- while (p != waiting_locks.end()) {
- if (p->second.start > fl.start)
- return;
- if (p->second.length == fl.length &&
- p->second.client == fl.client &&
- p->second.pid == fl.pid &&
- p->second.pid_namespace == fl.pid_namespace) {
- waiting_locks.erase(p);
- return;
- }
- ++p;
- }
- }
+ /**
+ * Check if a lock is on the waiting_locks list.
+ *
+ * @param fl The filelock to check for
+ * @returns True if the lock is waiting, false otherwise
+ */
+ bool is_waiting(ceph_filelock &fl);
+ /**
+ * Remove a lock from the waiting_locks list
+ *
+ * @param fl The filelock to remove
+ */
+ void remove_waiting(ceph_filelock& fl);
/*
* Try to set a new lock. If it's blocked and wait_on_fail is true,
* add the lock to waiting_locks.
* The lock needs to be of type CEPH_LOCK_EXCL or CEPH_LOCK_SHARED.
+ * This may merge previous locks, or convert the type of already-owned
+ * locks.
*
- * If we already added ourselves to waiting_locks, did_wait will be
- * true. If did_wait==true and we're not on the list, that means we
- * were canceled and we should return an error.
+ * @param new_lock The lock to set
+ * @param wait_on_fail whether to wait until the lock can be set.
+ * Otherwise it fails immediately when blocked.
*
- * Returns true if set, false if not set.
+ * @returns true if set, false if not set.
*/
- bool add_lock(ceph_filelock& new_lock, bool wait_on_fail) {
- dout(15) << "add_lock " << new_lock << dendl;
- bool ret = false;
- list<multimap<uint64_t, ceph_filelock>::iterator>
- overlapping_locks, self_overlapping_locks, neighbor_locks;
-
- // first, get any overlapping locks and split them into owned-by-us and not
- if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
- dout(15) << "got overlapping lock, splitting by owner" << dendl;
- split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
- }
- if (!overlapping_locks.empty()) { //overlapping locks owned by others :(
- if (CEPH_LOCK_EXCL == new_lock.type) {
- //can't set, we want an exclusive
- dout(15) << "overlapping lock, and this lock is exclusive, can't set"
- << dendl;
- if (wait_on_fail) {
- waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
- }
- } else { //shared lock, check for any exclusive locks blocking us
- if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
- dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
- if (wait_on_fail) {
- waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
- }
- } else {
- //yay, we can insert a shared lock
- dout(15) << "inserting shared lock" << dendl;
- adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
- held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
- ret = true;
- }
- }
- } else { //no overlapping locks except our own
- adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
- dout(15) << "no conflicts, inserting " << new_lock << dendl;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (new_lock.start, new_lock));
- ret = true;
- }
- if (ret)
- ++client_held_lock_counts[(client_t)new_lock.client];
- else if (wait_on_fail)
- ++client_waiting_lock_counts[(client_t)new_lock.client];
- return ret;
- }
-
- void look_for_lock(ceph_filelock& testing_lock) {
- list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
- self_overlapping_locks;
- if (get_overlapping_locks(testing_lock, overlapping_locks)) {
- split_by_owner(testing_lock, overlapping_locks, self_overlapping_locks);
- }
- if (!overlapping_locks.empty()) { //somebody else owns overlapping lock
- if (CEPH_LOCK_EXCL == testing_lock.type) { //any lock blocks it
- testing_lock = (*overlapping_locks.begin())->second;
- } else {
- ceph_filelock *blocking_lock;
- if ((blocking_lock = contains_exclusive_lock(overlapping_locks))) {
- testing_lock = *blocking_lock;
- } else { //nothing blocking!
- testing_lock.type = CEPH_LOCK_UNLOCK;
- }
- }
- return;
- }
- //if we get here, only our own locks block
- testing_lock.type = CEPH_LOCK_UNLOCK;
- }
+ bool add_lock(ceph_filelock& new_lock, bool wait_on_fail);
+ /**
+ * See if a lock is blocked by existing locks. If the lock is blocked,
+ * it will be set to the value of the first blocking lock. Otherwise,
+ * it will be returned unchanged, except for setting the type field
+ * to CEPH_LOCK_UNLOCK.
+ *
+ * @param testing_lock The lock to check for conflicts on.
+ */
+ void look_for_lock(ceph_filelock& testing_lock);
/*
* Remove lock(s) described in old_lock. This may involve splitting a
* previous lock or making a previous lock smaller.
+ *
+ * @param removal_lock The lock to remove
+ * @param activated_locks A return parameter, holding activated wait locks.
*/
void remove_lock(ceph_filelock removal_lock,
- list<ceph_filelock>& activated_locks) {
- list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
- self_overlapping_locks, crossed_waiting_locks;
- if (get_overlapping_locks(removal_lock, overlapping_locks)) {
- dout(15) << "splitting by owner" << dendl;
- split_by_owner(removal_lock, overlapping_locks, self_overlapping_locks);
- } else dout(15) << "attempt to remove lock at " << removal_lock.start
- << " but no locks there!" << dendl;
- bool remove_to_end = (0 == removal_lock.length);
- bool old_lock_to_end;
- uint64_t removal_start = removal_lock.start;
- uint64_t removal_end = removal_start + removal_lock.length - 1;
- uint64_t old_lock_end;
- __s64 old_lock_client = 0;
- ceph_filelock *old_lock;
-
- dout(15) << "examining " << self_overlapping_locks.size()
- << " self-overlapping locks for removal" << dendl;
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = self_overlapping_locks.begin();
- iter != self_overlapping_locks.end();
- ++iter) {
- dout(15) << "self overlapping lock " << (*iter)->second << dendl;
- old_lock = &(*iter)->second;
- old_lock_to_end = (0 == old_lock->length);
- old_lock_end = old_lock->start + old_lock->length - 1;
- old_lock_client = old_lock->client;
- if (remove_to_end) {
- if (old_lock->start < removal_start) {
- old_lock->length = removal_start - old_lock->start;
- } else {
- dout(15) << "erasing " << (*iter)->second << dendl;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- } else if (old_lock_to_end) {
- ceph_filelock append_lock = *old_lock;
- append_lock.start = removal_end+1;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (append_lock.start, append_lock));
- ++client_held_lock_counts[(client_t)old_lock->client];
- if (old_lock->start >= removal_start) {
- dout(15) << "erasing " << (*iter)->second << dendl;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- } else old_lock->length = removal_start - old_lock->start;
- } else {
- if (old_lock_end > removal_end) {
- ceph_filelock append_lock = *old_lock;
- append_lock.start = removal_end + 1;
- append_lock.length = old_lock_end - append_lock.start + 1;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (append_lock.start, append_lock));
- ++client_held_lock_counts[(client_t)old_lock->client];
- }
- if (old_lock->start < removal_start) {
- old_lock->length = removal_start - old_lock->start;
- } else {
- dout(15) << "erasing " << (*iter)->second << dendl;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- }
- if (!client_held_lock_counts[old_lock_client]) {
- client_held_lock_counts.erase(old_lock_client);
- }
- }
-
- /* okay, we've removed the locks, but removing them might allow some
- * other waiting locks to come through */
- if (get_waiting_overlaps(removal_lock, crossed_waiting_locks)) {
- /*let's do this the SUPER lazy way for now. Should work out something
- that's slightly less slow and wasteful, though.
- 1) Remove lock from waiting_locks.
- 2) attempt to insert lock via add_lock
- 3) Add to success list if we get back "true"
-
- In the future, should probably set this up to detect some
- guaranteed blocks and do fewer map lookups.
- */
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = crossed_waiting_locks.begin();
- iter != crossed_waiting_locks.end();
- ++iter) {
- ceph_filelock cur_lock = (*iter)->second;
- waiting_locks.erase(*iter);
- --client_waiting_lock_counts[(client_t)cur_lock.client];
- if (!client_waiting_lock_counts[(client_t)cur_lock.client]) {
- client_waiting_lock_counts.erase((client_t)cur_lock.client);
- }
- if (add_lock(cur_lock, true))
- activated_locks.push_back(cur_lock);
- }
- }
- }
-
- bool remove_all_from (client_t client) {
- bool cleared_any = false;
- if (client_held_lock_counts.count(client)) {
- remove_all_from(client, held_locks);
- client_held_lock_counts.erase(client);
- cleared_any = true;
- }
- if (client_waiting_lock_counts.count(client)) {
- remove_all_from(client, waiting_locks);
- client_waiting_lock_counts.erase(client);
- }
- return cleared_any;
- }
+ list<ceph_filelock>& activated_locks);
+ bool remove_all_from(client_t client);
private:
/**
* Adjust old locks owned by a single process so that process can set
@@ -270,182 +94,32 @@ private:
* This function should only be called once you know the lock will be
* inserted, as it DOES adjust new_lock. You can call this function
* on an empty list, in which case it does nothing.
- * This function does not remove elements from the list, so regard the list
+ * This function does not remove elements from old_locks, so regard the list
* as bad information following function invocation.
*
- * new_lock: The new lock the process has requested.
- * old_locks: list of all locks currently held by same
+ * @param new_lock The new lock the process has requested.
+ * @param old_locks list of all locks currently held by same
* client/process that overlap new_lock.
- * neighbor_locks: locks owned by same process that neighbor new_lock on
+ * @param neighbor_locks locks owned by same process that neighbor new_lock on
* left or right side.
*/
void adjust_locks(list<multimap<uint64_t, ceph_filelock>::iterator> old_locks,
- ceph_filelock& new_lock,
- list<multimap<uint64_t, ceph_filelock>::iterator>
- neighbor_locks) {
- dout(15) << "adjust_locks" << dendl;
- bool new_lock_to_end = (0 == new_lock.length);
- bool old_lock_to_end;
- uint64_t new_lock_start = new_lock.start;
- uint64_t new_lock_end = new_lock.start + new_lock.length - 1;
- uint64_t old_lock_start, old_lock_end;
- __s64 old_lock_client = 0;
- ceph_filelock *old_lock;
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = old_locks.begin();
- iter != old_locks.end();
- ++iter) {
- old_lock = &(*iter)->second;
- dout(15) << "adjusting lock: " << *old_lock << dendl;
- old_lock_to_end = (0 == old_lock->length);
- old_lock_start = old_lock->start;
- old_lock_end = old_lock->start + old_lock->length - 1;
- new_lock_start = new_lock.start;
- new_lock_end = new_lock.start + new_lock.length - 1;
- old_lock_client = old_lock->client;
- if (new_lock_to_end || old_lock_to_end) {
- //special code path to deal with a length set at 0
- dout(15) << "one lock extends forever" << dendl;
- if (old_lock->type == new_lock.type) {
- //just unify them in new lock, remove old lock
- dout(15) << "same lock type, unifying" << dendl;
- new_lock.start = (new_lock_start < old_lock_start) ? new_lock_start :
- old_lock_start;
- new_lock.length = 0;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- } else { //not same type, have to keep any remains of old lock around
- dout(15) << "shrinking old lock" << dendl;
- if (new_lock_to_end) {
- if (old_lock_start < new_lock_start) {
- old_lock->length = new_lock_start - old_lock_start;
- } else {
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- } else { //old lock extends past end of new lock
- ceph_filelock appended_lock = *old_lock;
- appended_lock.start = new_lock_end + 1;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (appended_lock.start, appended_lock));
- ++client_held_lock_counts[(client_t)old_lock->client];
- if (old_lock_start < new_lock_start) {
- old_lock->length = new_lock_start - old_lock_start;
- } else {
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- }
- }
- } else {
- if (old_lock->type == new_lock.type) { //just merge them!
- dout(15) << "merging locks, they're the same type" << dendl;
- new_lock.start = (old_lock_start < new_lock_start ) ? old_lock_start :
- new_lock_start;
- int new_end = (new_lock_end > old_lock_end) ? new_lock_end :
- old_lock_end;
- new_lock.length = new_end - new_lock.start + 1;
- dout(15) << "erasing lock " << (*iter)->second << dendl;
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- } else { //we'll have to update sizes and maybe make new locks
- dout(15) << "locks aren't same type, changing sizes" << dendl;
- if (old_lock_end > new_lock_end) { //add extra lock after new_lock
- ceph_filelock appended_lock = *old_lock;
- appended_lock.start = new_lock_end + 1;
- appended_lock.length = old_lock_end - appended_lock.start + 1;
- held_locks.insert(pair<uint64_t, ceph_filelock>
- (appended_lock.start, appended_lock));
- ++client_held_lock_counts[(client_t)old_lock->client];
- }
- if (old_lock_start < new_lock_start) {
- old_lock->length = new_lock_start - old_lock_start;
- } else { //old_lock starts inside new_lock, so remove it
- //if it extended past new_lock_end it's been replaced
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- }
- }
- if (!client_held_lock_counts[old_lock_client]) {
- client_held_lock_counts.erase(old_lock_client);
- }
- }
-
- //make sure to coalesce neighboring locks
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = neighbor_locks.begin();
- iter != neighbor_locks.end();
- ++iter) {
- old_lock = &(*iter)->second;
- old_lock_client = old_lock->client;
- dout(15) << "lock to coalesce: " << *old_lock << dendl;
- /* because if it's a neibhoring lock there can't be any self-overlapping
- locks that covered it */
- if (old_lock->type == new_lock.type) { //merge them
- if (0 == new_lock.length) {
- if (old_lock->start + old_lock->length == new_lock.start) {
- new_lock.start = old_lock->start;
- } else assert(0); /* if there's no end to new_lock, the neighbor
- HAS TO be to left side */
- } else if (0 == old_lock->length) {
- if (new_lock.start + new_lock.length == old_lock->start) {
- new_lock.length = 0;
- } else assert(0); //same as before, but reversed
- } else {
- if (old_lock->start + old_lock->length == new_lock.start) {
- new_lock.start = old_lock->start;
- new_lock.length = old_lock->length + new_lock.length;
- } else if (new_lock.start + new_lock.length == old_lock->start) {
- new_lock.length = old_lock->length + new_lock.length;
- }
- }
- held_locks.erase(*iter);
- --client_held_lock_counts[old_lock_client];
- }
- if (!client_held_lock_counts[old_lock_client]) {
- client_held_lock_counts.erase(old_lock_client);
- }
- }
- }
+ ceph_filelock& new_lock,
+ list<multimap<uint64_t, ceph_filelock>::iterator>
+ neighbor_locks);
//this won't reset the counter map value, do that yourself
- void remove_all_from(client_t client, multimap<uint64_t, ceph_filelock>& locks) {
- multimap<uint64_t, ceph_filelock>::iterator iter = locks.begin();
- while (iter != locks.end()) {
- if ((client_t)iter->second.client == client) {
- locks.erase(iter++);
- } else ++iter;
- }
- }
+ void remove_all_from(client_t client,
+ multimap<uint64_t, ceph_filelock>& locks);
//get last lock prior to start position
multimap<uint64_t, ceph_filelock>::iterator
- get_lower_bound(uint64_t start, multimap<uint64_t, ceph_filelock>& lock_map) {
- multimap<uint64_t, ceph_filelock>::iterator lower_bound =
- lock_map.lower_bound(start);
- if ((lower_bound->first != start)
- && (start != 0)
- && (lower_bound != lock_map.begin())) --lower_bound;
- if (lock_map.end() == lower_bound)
- dout(15) << "get_lower_dout(15)eturning end()" << dendl;
- else dout(15) << "get_lower_bound returning iterator pointing to "
- << lower_bound->second << dendl;
- return lower_bound;
- }
-
+ get_lower_bound(uint64_t start,
+ multimap<uint64_t, ceph_filelock>& lock_map);
//get latest-starting lock that goes over the byte "end"
multimap<uint64_t, ceph_filelock>::iterator
- get_last_before(uint64_t end, multimap<uint64_t, ceph_filelock>& lock_map) {
- multimap<uint64_t, ceph_filelock>::iterator last =
- lock_map.upper_bound(end);
- if (last != lock_map.begin()) --last;
- if (lock_map.end() == last)
- dout(15) << "get_last_before returning end()" << dendl;
- else dout(15) << "get_last_before returning iterator pointing to "
- << last->second << dendl;
- return last;
- }
+ get_last_before(uint64_t end,
+ multimap<uint64_t, ceph_filelock>& lock_map);
/*
* See if an iterator's lock covers any of the same bounds as a given range
@@ -454,25 +128,18 @@ private:
* If the length is 0, the lock covers from "start" to the end of the file.
*/
bool share_space(multimap<uint64_t, ceph_filelock>::iterator& iter,
- uint64_t start, uint64_t end) {
- bool ret = ((iter->first >= start && iter->first <= end) ||
- ((iter->first < start) &&
- (((iter->first + iter->second.length - 1) >= start) ||
- (0 == iter->second.length))));
- dout(15) << "share_space got start: " << start << ", end: " << end
- << ", lock: " << iter->second << ", returning " << ret << dendl;
- return ret;
- }
+ uint64_t start, uint64_t end);
+
bool share_space(multimap<uint64_t, ceph_filelock>::iterator& iter,
- ceph_filelock& lock) {
+ ceph_filelock &lock) {
uint64_t end = lock.start;
- if (lock.length)
+ if (lock.length) {
end += lock.length - 1;
- else // zero length means to end-of-file
+ } else { // zero length means end of file
end = uint64_t(-1);
+ }
return share_space(iter, lock.start, end);
}
-
/*
*get a list of all locks overlapping with the given lock's range
* lock: the lock to compare with.
@@ -480,47 +147,11 @@ private:
* Returns: true if at least one lock overlaps.
*/
bool get_overlapping_locks(ceph_filelock& lock,
- list<multimap<uint64_t, ceph_filelock>::iterator> & overlaps,
- list<multimap<uint64_t, ceph_filelock>::iterator> *self_neighbors) {
- dout(15) << "get_overlapping_locks" << dendl;
- // create a lock starting one earlier and ending one later
- // to check for neighbors
- ceph_filelock neighbor_check_lock = lock;
- if (neighbor_check_lock.start != 0) {
- neighbor_check_lock.start = neighbor_check_lock.start - 1;
- if (neighbor_check_lock.length)
- neighbor_check_lock.length = neighbor_check_lock.length + 2;
- } else {
- if (neighbor_check_lock.length)
- neighbor_check_lock.length = neighbor_check_lock.length + 1;
- }
- //find the last held lock starting at the point after lock
- uint64_t endpoint = lock.start;
- if (lock.length) {
- endpoint += lock.length;
- } else {
- endpoint = uint64_t(-1); // max offset
- }
- multimap<uint64_t, ceph_filelock>::iterator iter =
- get_last_before(endpoint, held_locks);
- bool cont = iter != held_locks.end();
- while(cont) {
- if (share_space(iter, lock)) {
- overlaps.push_front(iter);
- } else if (self_neighbors &&
- (neighbor_check_lock.client == iter->second.client) &&
- (neighbor_check_lock.pid == iter->second.pid) &&
- share_space(iter, neighbor_check_lock)) {
- self_neighbors->push_front(iter);
- }
- if ((iter->first < lock.start) && (CEPH_LOCK_EXCL == iter->second.type)) {
- //can't be any more overlapping locks or they'd interfere with this one
- cont = false;
- } else if (held_locks.begin() == iter) cont = false;
- else --iter;
- }
- return !overlaps.empty();
- }
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> & overlaps,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> *self_neighbors);
+
bool get_overlapping_locks(ceph_filelock& lock,
list<multimap<uint64_t, ceph_filelock>::iterator>& overlaps) {
@@ -534,20 +165,8 @@ private:
* Returns: true if at least one waiting_lock overlaps
*/
bool get_waiting_overlaps(ceph_filelock& lock,
- list<multimap<uint64_t, ceph_filelock>::iterator>&
- overlaps) {
- dout(15) << "get_waiting_overlaps" << dendl;
- multimap<uint64_t, ceph_filelock>::iterator iter =
- get_last_before(lock.start + lock.length - 1, waiting_locks);
- bool cont = iter != waiting_locks.end();
- while(cont) {
- if (share_space(iter, lock)) overlaps.push_front(iter);
- if (waiting_locks.begin() == iter) cont = false;
- --iter;
- }
- return !overlaps.empty();
- }
-
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator>& overlaps);
/*
* split a list of locks up by whether they're owned by same
* process as given lock
@@ -557,38 +176,13 @@ private:
* owned_locks: an empty list, to be filled with the locks owned by owner
*/
void split_by_owner(ceph_filelock& owner,
- list<multimap<uint64_t, ceph_filelock>::iterator> & locks,
- list<multimap<uint64_t, ceph_filelock>::iterator> & owned_locks) {
- list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = locks.begin();
- dout(15) << "owner lock: " << owner << dendl;
- while (iter != locks.end()) {
- dout(15) << "comparing to " << (*iter)->second << dendl;
- if ((*iter)->second.client == owner.client &&
- (*iter)->second.pid_namespace == owner.pid_namespace &&
- (*iter)->second.pid == owner.pid) {
- dout(15) << "success, pushing to owned_locks" << dendl;
- owned_locks.push_back(*iter);
- iter = locks.erase(iter);
- } else {
- dout(15) << "failure, something not equal in this group "
- << (*iter)->second.client << ":" << owner.client << ","
- << (*iter)->second.pid_namespace << ":" << owner.pid_namespace
- << "," << (*iter)->second.pid << ":" << owner.pid << dendl;
- ++iter;
- }
- }
- }
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> & locks,
+ list<multimap<uint64_t,
+ ceph_filelock>::iterator> & owned_locks);
- ceph_filelock *contains_exclusive_lock(list<multimap<uint64_t, ceph_filelock>::iterator>& locks) {
- for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
- iter = locks.begin();
- iter != locks.end();
- ++iter) {
- if (CEPH_LOCK_EXCL == (*iter)->second.type) return &(*iter)->second;
- }
- return NULL;
- }
+ ceph_filelock *contains_exclusive_lock(list<multimap<uint64_t,
+ ceph_filelock>::iterator>& locks);
public:
void encode(bufferlist& bl) const {