summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordormando <dormando@rydia.net>2012-01-08 20:27:07 -0800
committerdormando <dormando@rydia.net>2012-01-08 20:27:07 -0800
commitf4983b2068d13e5dc71fc075c35a085e904999cf (patch)
tree545be5c84974c715411cbefa33cfbe07ad7af7a4
parent324975cee252e7d28496a238334c22eba6d7cda1 (diff)
downloadmemcached-f4983b2068d13e5dc71fc075c35a085e904999cf.tar.gz
Fix a race condition from 1.4.10 on item_remove1.4.11-beta1
Updates the slab mover for the new method. 1.4.10 lacks some crucial protection around item freeing and removal, resulting in some potential crashes. Moving the cache_lock around item_remove caused a 30% performance drop, so it's been reimplemented with GCC atomics. refcount of 1 now means an item is linked but has no reference, which allows us to test an atomic sub and fetch of 0 as a clear indicator of when to free an item.
-rw-r--r--items.c44
-rw-r--r--slabs.c75
2 files changed, 71 insertions, 48 deletions
diff --git a/items.c b/items.c
index 174b50f..faf43b5 100644
--- a/items.c
+++ b/items.c
@@ -104,7 +104,7 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
rel_time_t oldest_live = settings.oldest_live;
search = tails[id];
- if (search != NULL && search->refcount == 0) {
+ if (search != NULL && (__sync_add_and_fetch(&search->refcount, 1) == 2)) {
if ((search->exptime != 0 && search->exptime < current_time)
|| (search->time < oldest_live)) { // dead by flush
STATS_LOCK();
@@ -118,7 +118,6 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
itemstats[id].expired_unfetched++;
}
it = search;
- it->refcount = 1;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
/* Initialize the item block: */
@@ -143,15 +142,18 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
stats.evictions++;
STATS_UNLOCK();
it = search;
- it->refcount = 1;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
/* Initialize the item block: */
it->slabs_clsid = 0;
+ } else {
+ __sync_sub_and_fetch(&search->refcount, 1);
}
} else {
/* If the LRU is empty or locked, attempt to allocate memory */
it = slabs_alloc(ntotal, id);
+ if (search != NULL)
+ __sync_sub_and_fetch(&search->refcount, 1);
}
if (it == NULL) {
@@ -163,10 +165,10 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
* free it anyway.
*/
if (search != NULL &&
- search->refcount != 0 &&
+ search->refcount != 2 &&
search->time + TAIL_REPAIR_TIME < current_time) {
itemstats[id].tailrepairs++;
- search->refcount = 0;
+ search->refcount = 1;
do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0));
}
pthread_mutex_unlock(&cache_lock);
@@ -206,7 +208,6 @@ void item_free(item *it) {
/* so slab size changer can tell later if item is already free or not */
clsid = it->slabs_clsid;
it->slabs_clsid = 0;
- it->it_flags |= ITEM_SLABBED;
DEBUG_REFCNT(it, 'F');
slabs_free(it, ntotal, clsid);
}
@@ -272,6 +273,7 @@ static void item_unlink_q(item *it) {
int do_item_link(item *it, const uint32_t hv) {
MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
+ mutex_lock(&cache_lock);
it->it_flags |= ITEM_LINKED;
it->time = current_time;
@@ -281,11 +283,11 @@ int do_item_link(item *it, const uint32_t hv) {
stats.total_items += 1;
STATS_UNLOCK();
- mutex_lock(&cache_lock);
/* Allocate a new CAS ID on link. */
ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
assoc_insert(it, hv);
item_link_q(it);
+ __sync_add_and_fetch(&it->refcount, 1);
pthread_mutex_unlock(&cache_lock);
return 1;
@@ -302,7 +304,7 @@ void do_item_unlink(item *it, const uint32_t hv) {
STATS_UNLOCK();
assoc_delete(ITEM_key(it), it->nkey, hv);
item_unlink_q(it);
- if (it->refcount == 0) item_free(it);
+ do_item_remove(it);
}
pthread_mutex_unlock(&cache_lock);
}
@@ -318,7 +320,7 @@ void do_item_unlink_nolock(item *it, const uint32_t hv) {
STATS_UNLOCK();
assoc_delete(ITEM_key(it), it->nkey, hv);
item_unlink_q(it);
- if (it->refcount == 0) item_free(it);
+ do_item_remove(it);
}
}
@@ -326,15 +328,9 @@ void do_item_remove(item *it) {
MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
assert((it->it_flags & ITEM_SLABBED) == 0);
- mutex_lock(&cache_lock);
- if (it->refcount != 0) {
- it->refcount--;
- DEBUG_REFCNT(it, '-');
- }
- if (it->refcount == 0 && (it->it_flags & ITEM_LINKED) == 0) {
+ if (__sync_sub_and_fetch(&it->refcount, 1) == 0) {
item_free(it);
}
- pthread_mutex_unlock(&cache_lock);
}
void do_item_update(item *it) {
@@ -488,14 +484,14 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
mutex_lock(&cache_lock);
item *it = assoc_find(key, nkey, hv);
if (it != NULL) {
- it->refcount++;
+ __sync_add_and_fetch(&it->refcount, 1);
/* Optimization for slab reassignment. prevents popular items from
* jamming in busy wait. Can only do this here to satisfy lock order
* of item_lock, cache_lock, slabs_lock. */
if (slab_rebalance_signal &&
((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
- it->refcount--;
do_item_unlink_nolock(it, hv);
+ do_item_remove(it);
it = NULL;
}
}
@@ -514,19 +510,15 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
if (it != NULL) {
if (settings.oldest_live != 0 && settings.oldest_live <= current_time &&
it->time <= settings.oldest_live) {
- mutex_lock(&cache_lock);
- it->refcount--;
- do_item_unlink_nolock(it, hv);
- pthread_mutex_unlock(&cache_lock);
+ do_item_unlink(it, hv);
+ do_item_remove(it);
it = NULL;
if (was_found) {
fprintf(stderr, " -nuked by flush");
}
} else if (it->exptime != 0 && it->exptime <= current_time) {
- mutex_lock(&cache_lock);
- it->refcount--;
- do_item_unlink_nolock(it, hv);
- pthread_mutex_unlock(&cache_lock);
+ do_item_unlink(it, hv);
+ do_item_remove(it);
it = NULL;
if (was_found) {
fprintf(stderr, " -nuked by expire");
diff --git a/slabs.c b/slabs.c
index 6029c34..a2757d0 100644
--- a/slabs.c
+++ b/slabs.c
@@ -292,6 +292,7 @@ static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
#endif
it = (item *)ptr;
+ it->it_flags |= ITEM_SLABBED;
it->prev = 0;
it->next = p->slots;
if (it->next) it->next->prev = it;
@@ -521,6 +522,10 @@ static int slab_rebalance_start(void) {
return 0;
}
+enum move_status {
+ MOVE_PASS=0, MOVE_DONE, MOVE_BUSY
+};
+
/* refcount == 0 is safe since nobody can incr while cache_lock is held.
* refcount != 0 is impossible since flags/etc can be modified in other
* threads. instead, note we found a busy one and bail. logic in do_item_get
@@ -530,6 +535,8 @@ static int slab_rebalance_move(void) {
slabclass_t *s_cls;
int x;
int was_busy = 0;
+ int refcount = 0;
+ enum move_status status = MOVE_PASS;
pthread_mutex_lock(&cache_lock);
pthread_mutex_lock(&slabs_lock);
@@ -538,30 +545,54 @@ static int slab_rebalance_move(void) {
for (x = 0; x < slab_bulk_check; x++) {
item *it = slab_rebal.slab_pos;
- if (it->refcount == 0) {
- if (it->it_flags & ITEM_SLABBED) {
- /* remove from freelist linked list */
- if (s_cls->slots == it) {
- s_cls->slots = it->next;
+ status = MOVE_PASS;
+ if (it->slabs_clsid != 255) {
+ refcount = __sync_add_and_fetch(&it->refcount, 1);
+ if (refcount == 1) { /* item is unlinked, unused */
+ if (it->it_flags & ITEM_SLABBED) {
+ /* remove from slab freelist */
+ if (s_cls->slots == it) {
+ s_cls->slots = it->next;
+ }
+ if (it->next) it->next->prev = it->prev;
+ if (it->prev) it->prev->next = it->next;
+ s_cls->sl_curr--;
+ status = MOVE_DONE;
+ } else {
+ status = MOVE_BUSY;
}
- if (it->next) it->next->prev = it->prev;
- if (it->prev) it->prev->next = it->next;
- s_cls->sl_curr--;
- } else if (it->it_flags != 0) {
- it->refcount = 1;
- /* Call unlink with refcount == 1 so it won't free */
- do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
- it->refcount = 0;
- }
- it->it_flags = 0;
- it->slabs_clsid = 0;
- } else {
- if (settings.verbose > 2) {
- fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
- it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
+ } else if (refcount == 2) { /* item is linked but not busy */
+ if ((it->it_flags & ITEM_LINKED) != 0) {
+ do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
+ status = MOVE_DONE;
+ } else {
+ /* refcount == 1 + !ITEM_LINKED means the item is being
+ * uploaded to, or was just unlinked but hasn't been freed
+ * yet. Let it bleed off on its own and try again later */
+ status = MOVE_BUSY;
+ }
+ } else {
+ if (settings.verbose > 2) {
+ fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
+ it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
+ }
+ status = MOVE_BUSY;
}
- slab_rebal.busy_items++;
- was_busy++;
+ }
+
+ switch (status) {
+ case MOVE_DONE:
+ it->refcount = 0;
+ it->it_flags = 0;
+ it->slabs_clsid = 255;
+ break;
+ case MOVE_BUSY:
+ slab_rebal.busy_items++;
+ was_busy++;
+ __sync_sub_and_fetch(&it->refcount, 1);
+ break;
+ case MOVE_PASS:
+ break;
}
slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;