summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordormando <dormando@rydia.net>2012-08-17 19:51:46 -0700
committerdormando <dormando@rydia.net>2012-09-03 00:35:54 -0700
commit2db1bf462c67c66323850272acd0f2b60d6e62ec (patch)
tree67d3e05a99ba5219c5c74364e74d7db4c8ead531
parentad27588391e4f5f85f07f9d734334ade4cc5dbb0 (diff)
downloadmemcached-2db1bf462c67c66323850272acd0f2b60d6e62ec.tar.gz
alloc loop now attempts an item_lock
Fixes a few issues with a restructuring... I think -M was broken before, should be fixed now. It had a refcount leak. Now walks up to five items from the bottom in case of the bottomost items being item_locked, or refcount locked. Helps avoid excessive OOM errors for some oddball cases. Those happen more often if you're hammering on a handful of pages in a very large class size (100k+) The hash item lock ensures that if we're holding that lock, no other thread can be incrementing the refcount lock at that time. It will mean more in future patches. slab rebalancer gets a similar update.
-rw-r--r--items.c119
-rw-r--r--items.h2
-rw-r--r--memcached.c4
-rw-r--r--memcached.h1
-rw-r--r--slabs.c63
-rw-r--r--thread.c6
6 files changed, 111 insertions, 84 deletions
diff --git a/items.c b/items.c
index 84011c4..3b26265 100644
--- a/items.c
+++ b/items.c
@@ -85,7 +85,9 @@ static size_t item_make_header(const uint8_t nkey, const int flags, const int nb
}
/*@null@*/
-item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
+item *do_item_alloc(char *key, const size_t nkey, const int flags,
+ const rel_time_t exptime, const int nbytes,
+ const uint32_t cur_hv) {
uint8_t nsuffix;
item *it = NULL;
char suffix[40];
@@ -100,13 +102,39 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
mutex_lock(&cache_lock);
/* do a quick check if we have any expired items in the tail.. */
+ int tries = 5;
+ int tried_alloc = 0;
item *search;
rel_time_t oldest_live = settings.oldest_live;
search = tails[id];
- if (search != NULL && (refcount_incr(&search->refcount) == 2)) {
+ /* We walk up *only* for locked items. Never searching for expired.
+ * Waste of CPU for almost all deployments */
+ for (; tries > 0 && search != NULL; tries--, search=search->prev) {
+ uint32_t hv = hash(ITEM_key(search), search->nkey, 0);
+ /* Attempt to hash item lock the "search" item. If locked, no
+ * other callers can incr the refcount
+ */
+ if (hv != cur_hv && item_trylock(hv) != 0)
+ continue;
+ /* Now see if the item is refcount locked */
+ if (refcount_incr(&search->refcount) != 2) {
+ refcount_decr(&search->refcount);
+ /* Old rare bug could cause a refcount leak. We haven't seen
+ * it in years, but we leave this code in to prevent failures
+ * just in case */
+ if (search->time + TAIL_REPAIR_TIME < current_time) {
+ itemstats[id].tailrepairs++;
+ search->refcount = 1;
+ do_item_unlink_nolock(search, hv);
+ }
+ item_unlock(hv);
+ continue;
+ }
+
+ /* Expired or flushed */
if ((search->exptime != 0 && search->exptime < current_time)
- || (search->time <= oldest_live && oldest_live <= current_time)) { // dead by flush
+ || (search->time <= oldest_live && oldest_live <= current_time)) {
STATS_LOCK();
stats.reclaimed++;
STATS_UNLOCK();
@@ -119,68 +147,55 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
}
it = search;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
- do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
+ do_item_unlink_nolock(it, hv);
/* Initialize the item block: */
it->slabs_clsid = 0;
} else if ((it = slabs_alloc(ntotal, id)) == NULL) {
+ tried_alloc = 1;
if (settings.evict_to_free == 0) {
itemstats[id].outofmemory++;
- mutex_unlock(&cache_lock);
- return NULL;
- }
- itemstats[id].evicted++;
- itemstats[id].evicted_time = current_time - search->time;
- if (search->exptime != 0)
- itemstats[id].evicted_nonzero++;
- if ((search->it_flags & ITEM_FETCHED) == 0) {
+ } else {
+ itemstats[id].evicted++;
+ itemstats[id].evicted_time = current_time - search->time;
+ if (search->exptime != 0)
+ itemstats[id].evicted_nonzero++;
+ if ((search->it_flags & ITEM_FETCHED) == 0) {
+ STATS_LOCK();
+ stats.evicted_unfetched++;
+ STATS_UNLOCK();
+ itemstats[id].evicted_unfetched++;
+ }
STATS_LOCK();
- stats.evicted_unfetched++;
+ stats.evictions++;
STATS_UNLOCK();
- itemstats[id].evicted_unfetched++;
+ it = search;
+ slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
+ do_item_unlink_nolock(it, hv);
+ /* Initialize the item block: */
+ it->slabs_clsid = 0;
+
+ /* If we've just evicted an item, and the automover is set to
+ * angry bird mode, attempt to rip memory into this slab class.
+ * TODO: Move valid object detection into a function, and on a
+ * "successful" memory pull, look behind and see if the next alloc
+ * would be an eviction. Then kick off the slab mover before the
+ * eviction happens.
+ */
+ if (settings.slab_automove == 2)
+ slabs_reassign(-1, id);
}
- STATS_LOCK();
- stats.evictions++;
- STATS_UNLOCK();
- it = search;
- slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
- do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
- /* Initialize the item block: */
- it->slabs_clsid = 0;
-
- /* If we've just evicted an item, and the automover is set to
- * angry bird mode, attempt to rip memory into this slab class.
- * TODO: Move valid object detection into a function, and on a
- * "successful" memory pull, look behind and see if the next alloc
- * would be an eviction. Then kick off the slab mover before the
- * eviction happens.
- */
- if (settings.slab_automove == 2)
- slabs_reassign(-1, id);
- } else {
- refcount_decr(&search->refcount);
}
- } else {
- /* If the LRU is empty or locked, attempt to allocate memory */
- it = slabs_alloc(ntotal, id);
- if (search != NULL)
- refcount_decr(&search->refcount);
+
+ refcount_decr(&search->refcount);
+ item_unlock(hv);
+ break;
}
+ if (!tried_alloc && (tries == 0 || search == NULL))
+ it = slabs_alloc(ntotal, id);
+
if (it == NULL) {
itemstats[id].outofmemory++;
- /* Last ditch effort. There was a very rare bug which caused
- * refcount leaks. We leave this just in case they ever happen again.
- * We can reasonably assume no item can stay locked for more than
- * three hours, so if we find one in the tail which is that old,
- * free it anyway.
- */
- if (search != NULL &&
- search->refcount != 2 &&
- search->time + TAIL_REPAIR_TIME < current_time) {
- itemstats[id].tailrepairs++;
- search->refcount = 1;
- do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0));
- }
mutex_unlock(&cache_lock);
return NULL;
}
diff --git a/items.h b/items.h
index 2ec142d..4b25dcc 100644
--- a/items.h
+++ b/items.h
@@ -2,7 +2,7 @@
uint64_t get_cas_id(void);
/*@null@*/
-item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes);
+item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv);
void item_free(item *it);
bool item_size_ok(const size_t nkey, const int flags, const int nbytes);
diff --git a/memcached.c b/memcached.c
index 2cb0b20..71d5ff0 100644
--- a/memcached.c
+++ b/memcached.c
@@ -2325,7 +2325,7 @@ enum store_item_type do_store_item(item *it, int comm, conn *c, const uint32_t h
flags = (int) strtol(ITEM_suffix(old_it), (char **) NULL, 10);
- new_it = item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */);
+ new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */, hv);
if (new_it == NULL) {
/* SERVER_ERROR out of memory */
@@ -3101,7 +3101,7 @@ enum delta_result_type do_add_delta(conn *c, const char *key, const size_t nkey,
res = strlen(buf);
if (res + 2 > it->nbytes || it->refcount != 1) { /* need to realloc */
item *new_it;
- new_it = item_alloc(ITEM_key(it), it->nkey, atoi(ITEM_suffix(it) + 1), it->exptime, res + 2 );
+ new_it = do_item_alloc(ITEM_key(it), it->nkey, atoi(ITEM_suffix(it) + 1), it->exptime, res + 2, hv);
if (new_it == 0) {
do_item_remove(it);
return EOM;
diff --git a/memcached.h b/memcached.h
index 7774e0e..0517d4a 100644
--- a/memcached.h
+++ b/memcached.h
@@ -532,6 +532,7 @@ void item_unlink(item *it);
void item_update(item *it);
void item_lock(uint32_t hv);
+int item_trylock(uint32_t hv);
void item_unlock(uint32_t hv);
unsigned short refcount_incr(unsigned short *refcount);
unsigned short refcount_decr(unsigned short *refcount);
diff --git a/slabs.c b/slabs.c
index b74617e..71202fd 100644
--- a/slabs.c
+++ b/slabs.c
@@ -498,7 +498,7 @@ static int slab_rebalance_start(void) {
}
enum move_status {
- MOVE_PASS=0, MOVE_DONE, MOVE_BUSY
+ MOVE_PASS=0, MOVE_DONE, MOVE_BUSY, MOVE_LOCKED
};
/* refcount == 0 is safe since nobody can incr while cache_lock is held.
@@ -522,36 +522,42 @@ static int slab_rebalance_move(void) {
item *it = slab_rebal.slab_pos;
status = MOVE_PASS;
if (it->slabs_clsid != 255) {
- refcount = refcount_incr(&it->refcount);
- if (refcount == 1) { /* item is unlinked, unused */
- if (it->it_flags & ITEM_SLABBED) {
- /* remove from slab freelist */
- if (s_cls->slots == it) {
- s_cls->slots = it->next;
+ uint32_t hv = hash(ITEM_key(it), it->nkey, 0);
+ if (item_trylock(hv) != 0) {
+ status = MOVE_LOCKED;
+ } else {
+ refcount = refcount_incr(&it->refcount);
+ if (refcount == 1) { /* item is unlinked, unused */
+ if (it->it_flags & ITEM_SLABBED) {
+ /* remove from slab freelist */
+ if (s_cls->slots == it) {
+ s_cls->slots = it->next;
+ }
+ if (it->next) it->next->prev = it->prev;
+ if (it->prev) it->prev->next = it->next;
+ s_cls->sl_curr--;
+ status = MOVE_DONE;
+ } else {
+ status = MOVE_BUSY;
+ }
+ } else if (refcount == 2) { /* item is linked but not busy */
+ if ((it->it_flags & ITEM_LINKED) != 0) {
+ do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
+ status = MOVE_DONE;
+ } else {
+ /* refcount == 1 + !ITEM_LINKED means the item is being
+ * uploaded to, or was just unlinked but hasn't been freed
+ * yet. Let it bleed off on its own and try again later */
+ status = MOVE_BUSY;
}
- if (it->next) it->next->prev = it->prev;
- if (it->prev) it->prev->next = it->next;
- s_cls->sl_curr--;
- status = MOVE_DONE;
- } else {
- status = MOVE_BUSY;
- }
- } else if (refcount == 2) { /* item is linked but not busy */
- if ((it->it_flags & ITEM_LINKED) != 0) {
- do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
- status = MOVE_DONE;
} else {
- /* refcount == 1 + !ITEM_LINKED means the item is being
- * uploaded to, or was just unlinked but hasn't been freed
- * yet. Let it bleed off on its own and try again later */
+ if (settings.verbose > 2) {
+ fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
+ it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
+ }
status = MOVE_BUSY;
}
- } else {
- if (settings.verbose > 2) {
- fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
- it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
- }
- status = MOVE_BUSY;
+ item_unlock(hv);
}
}
@@ -562,9 +568,10 @@ static int slab_rebalance_move(void) {
it->slabs_clsid = 255;
break;
case MOVE_BUSY:
+ refcount_decr(&it->refcount);
+ case MOVE_LOCKED:
slab_rebal.busy_items++;
was_busy++;
- refcount_decr(&it->refcount);
break;
case MOVE_PASS:
break;
diff --git a/thread.c b/thread.c
index f4bbe1e..93867a5 100644
--- a/thread.c
+++ b/thread.c
@@ -112,6 +112,10 @@ void item_lock(uint32_t hv) {
mutex_lock(&item_locks[hv & item_lock_mask]);
}
+int item_trylock(uint32_t hv) {
+ return pthread_mutex_trylock(&item_locks[hv & item_lock_mask]);
+}
+
void item_unlock(uint32_t hv) {
mutex_unlock(&item_locks[hv & item_lock_mask]);
}
@@ -381,7 +385,7 @@ int is_listen_thread() {
item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) {
item *it;
/* do_item_alloc handles its own locks */
- it = do_item_alloc(key, nkey, flags, exptime, nbytes);
+ it = do_item_alloc(key, nkey, flags, exptime, nbytes, 0);
return it;
}