summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--items.c119
-rw-r--r--items.h2
-rw-r--r--memcached.c4
-rw-r--r--memcached.h1
-rw-r--r--slabs.c63
-rw-r--r--thread.c6
6 files changed, 111 insertions, 84 deletions
diff --git a/items.c b/items.c
index 84011c4..3b26265 100644
--- a/items.c
+++ b/items.c
@@ -85,7 +85,9 @@ static size_t item_make_header(const uint8_t nkey, const int flags, const int nb
}
/*@null@*/
-item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
+item *do_item_alloc(char *key, const size_t nkey, const int flags,
+ const rel_time_t exptime, const int nbytes,
+ const uint32_t cur_hv) {
uint8_t nsuffix;
item *it = NULL;
char suffix[40];
@@ -100,13 +102,39 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
mutex_lock(&cache_lock);
/* do a quick check if we have any expired items in the tail.. */
+ int tries = 5;
+ int tried_alloc = 0;
item *search;
rel_time_t oldest_live = settings.oldest_live;
search = tails[id];
- if (search != NULL && (refcount_incr(&search->refcount) == 2)) {
+ /* We walk up *only* for locked items. Never searching for expired.
+ * Waste of CPU for almost all deployments */
+ for (; tries > 0 && search != NULL; tries--, search=search->prev) {
+ uint32_t hv = hash(ITEM_key(search), search->nkey, 0);
+ /* Attempt to hash item lock the "search" item. If locked, no
+ * other callers can incr the refcount
+ */
+ if (hv != cur_hv && item_trylock(hv) != 0)
+ continue;
+ /* Now see if the item is refcount locked */
+ if (refcount_incr(&search->refcount) != 2) {
+ refcount_decr(&search->refcount);
+ /* Old rare bug could cause a refcount leak. We haven't seen
+ * it in years, but we leave this code in to prevent failures
+ * just in case */
+ if (search->time + TAIL_REPAIR_TIME < current_time) {
+ itemstats[id].tailrepairs++;
+ search->refcount = 1;
+ do_item_unlink_nolock(search, hv);
+ }
+ item_unlock(hv);
+ continue;
+ }
+
+ /* Expired or flushed */
if ((search->exptime != 0 && search->exptime < current_time)
- || (search->time <= oldest_live && oldest_live <= current_time)) { // dead by flush
+ || (search->time <= oldest_live && oldest_live <= current_time)) {
STATS_LOCK();
stats.reclaimed++;
STATS_UNLOCK();
@@ -119,68 +147,55 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
}
it = search;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
- do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
+ do_item_unlink_nolock(it, hv);
/* Initialize the item block: */
it->slabs_clsid = 0;
} else if ((it = slabs_alloc(ntotal, id)) == NULL) {
+ tried_alloc = 1;
if (settings.evict_to_free == 0) {
itemstats[id].outofmemory++;
- mutex_unlock(&cache_lock);
- return NULL;
- }
- itemstats[id].evicted++;
- itemstats[id].evicted_time = current_time - search->time;
- if (search->exptime != 0)
- itemstats[id].evicted_nonzero++;
- if ((search->it_flags & ITEM_FETCHED) == 0) {
+ } else {
+ itemstats[id].evicted++;
+ itemstats[id].evicted_time = current_time - search->time;
+ if (search->exptime != 0)
+ itemstats[id].evicted_nonzero++;
+ if ((search->it_flags & ITEM_FETCHED) == 0) {
+ STATS_LOCK();
+ stats.evicted_unfetched++;
+ STATS_UNLOCK();
+ itemstats[id].evicted_unfetched++;
+ }
STATS_LOCK();
- stats.evicted_unfetched++;
+ stats.evictions++;
STATS_UNLOCK();
- itemstats[id].evicted_unfetched++;
+ it = search;
+ slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
+ do_item_unlink_nolock(it, hv);
+ /* Initialize the item block: */
+ it->slabs_clsid = 0;
+
+ /* If we've just evicted an item, and the automover is set to
+ * angry bird mode, attempt to rip memory into this slab class.
+ * TODO: Move valid object detection into a function, and on a
+ * "successful" memory pull, look behind and see if the next alloc
+ * would be an eviction. Then kick off the slab mover before the
+ * eviction happens.
+ */
+ if (settings.slab_automove == 2)
+ slabs_reassign(-1, id);
}
- STATS_LOCK();
- stats.evictions++;
- STATS_UNLOCK();
- it = search;
- slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
- do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
- /* Initialize the item block: */
- it->slabs_clsid = 0;
-
- /* If we've just evicted an item, and the automover is set to
- * angry bird mode, attempt to rip memory into this slab class.
- * TODO: Move valid object detection into a function, and on a
- * "successful" memory pull, look behind and see if the next alloc
- * would be an eviction. Then kick off the slab mover before the
- * eviction happens.
- */
- if (settings.slab_automove == 2)
- slabs_reassign(-1, id);
- } else {
- refcount_decr(&search->refcount);
}
- } else {
- /* If the LRU is empty or locked, attempt to allocate memory */
- it = slabs_alloc(ntotal, id);
- if (search != NULL)
- refcount_decr(&search->refcount);
+
+ refcount_decr(&search->refcount);
+ item_unlock(hv);
+ break;
}
+ if (!tried_alloc && (tries == 0 || search == NULL))
+ it = slabs_alloc(ntotal, id);
+
if (it == NULL) {
itemstats[id].outofmemory++;
- /* Last ditch effort. There was a very rare bug which caused
- * refcount leaks. We leave this just in case they ever happen again.
- * We can reasonably assume no item can stay locked for more than
- * three hours, so if we find one in the tail which is that old,
- * free it anyway.
- */
- if (search != NULL &&
- search->refcount != 2 &&
- search->time + TAIL_REPAIR_TIME < current_time) {
- itemstats[id].tailrepairs++;
- search->refcount = 1;
- do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0));
- }
mutex_unlock(&cache_lock);
return NULL;
}
diff --git a/items.h b/items.h
index 2ec142d..4b25dcc 100644
--- a/items.h
+++ b/items.h
@@ -2,7 +2,7 @@
uint64_t get_cas_id(void);
/*@null@*/
-item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes);
+item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv);
void item_free(item *it);
bool item_size_ok(const size_t nkey, const int flags, const int nbytes);
diff --git a/memcached.c b/memcached.c
index 2cb0b20..71d5ff0 100644
--- a/memcached.c
+++ b/memcached.c
@@ -2325,7 +2325,7 @@ enum store_item_type do_store_item(item *it, int comm, conn *c, const uint32_t h
flags = (int) strtol(ITEM_suffix(old_it), (char **) NULL, 10);
- new_it = item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */);
+ new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */, hv);
if (new_it == NULL) {
/* SERVER_ERROR out of memory */
@@ -3101,7 +3101,7 @@ enum delta_result_type do_add_delta(conn *c, const char *key, const size_t nkey,
res = strlen(buf);
if (res + 2 > it->nbytes || it->refcount != 1) { /* need to realloc */
item *new_it;
- new_it = item_alloc(ITEM_key(it), it->nkey, atoi(ITEM_suffix(it) + 1), it->exptime, res + 2 );
+ new_it = do_item_alloc(ITEM_key(it), it->nkey, atoi(ITEM_suffix(it) + 1), it->exptime, res + 2, hv);
if (new_it == 0) {
do_item_remove(it);
return EOM;
diff --git a/memcached.h b/memcached.h
index 7774e0e..0517d4a 100644
--- a/memcached.h
+++ b/memcached.h
@@ -532,6 +532,7 @@ void item_unlink(item *it);
void item_update(item *it);
void item_lock(uint32_t hv);
+int item_trylock(uint32_t hv);
void item_unlock(uint32_t hv);
unsigned short refcount_incr(unsigned short *refcount);
unsigned short refcount_decr(unsigned short *refcount);
diff --git a/slabs.c b/slabs.c
index b74617e..71202fd 100644
--- a/slabs.c
+++ b/slabs.c
@@ -498,7 +498,7 @@ static int slab_rebalance_start(void) {
}
enum move_status {
- MOVE_PASS=0, MOVE_DONE, MOVE_BUSY
+ MOVE_PASS=0, MOVE_DONE, MOVE_BUSY, MOVE_LOCKED
};
/* refcount == 0 is safe since nobody can incr while cache_lock is held.
@@ -522,36 +522,42 @@ static int slab_rebalance_move(void) {
item *it = slab_rebal.slab_pos;
status = MOVE_PASS;
if (it->slabs_clsid != 255) {
- refcount = refcount_incr(&it->refcount);
- if (refcount == 1) { /* item is unlinked, unused */
- if (it->it_flags & ITEM_SLABBED) {
- /* remove from slab freelist */
- if (s_cls->slots == it) {
- s_cls->slots = it->next;
+ uint32_t hv = hash(ITEM_key(it), it->nkey, 0);
+ if (item_trylock(hv) != 0) {
+ status = MOVE_LOCKED;
+ } else {
+ refcount = refcount_incr(&it->refcount);
+ if (refcount == 1) { /* item is unlinked, unused */
+ if (it->it_flags & ITEM_SLABBED) {
+ /* remove from slab freelist */
+ if (s_cls->slots == it) {
+ s_cls->slots = it->next;
+ }
+ if (it->next) it->next->prev = it->prev;
+ if (it->prev) it->prev->next = it->next;
+ s_cls->sl_curr--;
+ status = MOVE_DONE;
+ } else {
+ status = MOVE_BUSY;
+ }
+ } else if (refcount == 2) { /* item is linked but not busy */
+ if ((it->it_flags & ITEM_LINKED) != 0) {
+ do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
+ status = MOVE_DONE;
+ } else {
+ /* refcount == 1 + !ITEM_LINKED means the item is being
+ * uploaded to, or was just unlinked but hasn't been freed
+ * yet. Let it bleed off on its own and try again later */
+ status = MOVE_BUSY;
}
- if (it->next) it->next->prev = it->prev;
- if (it->prev) it->prev->next = it->next;
- s_cls->sl_curr--;
- status = MOVE_DONE;
- } else {
- status = MOVE_BUSY;
- }
- } else if (refcount == 2) { /* item is linked but not busy */
- if ((it->it_flags & ITEM_LINKED) != 0) {
- do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
- status = MOVE_DONE;
} else {
- /* refcount == 1 + !ITEM_LINKED means the item is being
- * uploaded to, or was just unlinked but hasn't been freed
- * yet. Let it bleed off on its own and try again later */
+ if (settings.verbose > 2) {
+ fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
+ it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
+ }
status = MOVE_BUSY;
}
- } else {
- if (settings.verbose > 2) {
- fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
- it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
- }
- status = MOVE_BUSY;
+ item_unlock(hv);
}
}
@@ -562,9 +568,10 @@ static int slab_rebalance_move(void) {
it->slabs_clsid = 255;
break;
case MOVE_BUSY:
+ refcount_decr(&it->refcount);
+ case MOVE_LOCKED:
slab_rebal.busy_items++;
was_busy++;
- refcount_decr(&it->refcount);
break;
case MOVE_PASS:
break;
diff --git a/thread.c b/thread.c
index f4bbe1e..93867a5 100644
--- a/thread.c
+++ b/thread.c
@@ -112,6 +112,10 @@ void item_lock(uint32_t hv) {
mutex_lock(&item_locks[hv & item_lock_mask]);
}
+int item_trylock(uint32_t hv) {
+ return pthread_mutex_trylock(&item_locks[hv & item_lock_mask]);
+}
+
void item_unlock(uint32_t hv) {
mutex_unlock(&item_locks[hv & item_lock_mask]);
}
@@ -381,7 +385,7 @@ int is_listen_thread() {
item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) {
item *it;
/* do_item_alloc handles its own locks */
- it = do_item_alloc(key, nkey, flags, exptime, nbytes);
+ it = do_item_alloc(key, nkey, flags, exptime, nbytes, 0);
return it;
}