diff options
author | dormando <dormando@rydia.net> | 2011-12-15 17:18:43 -0800 |
---|---|---|
committer | dormando <dormando@rydia.net> | 2011-12-15 17:18:43 -0800 |
commit | 40b7b4b2b62d67062eb536cb5395e74172c0c31c (patch) | |
tree | 50a79b9ee1ab8fec882756f685a7f73390aeaaa0 | |
parent | f58de2a4b115bfc00ba24e42244d26ee561cde3a (diff) | |
download | memcached-40b7b4b2b62d67062eb536cb5395e74172c0c31c.tar.gz |
clean do_item_get logic a bit. fix race.
the race here is absolutely insane:
- do_item_get and do_item_alloc call at the same time, against different
items
- do_item_get wins cache_lock lock race, returns item for internal testing
- do_item_alloc runs next, pulls item off of tail of a slab class which is the
same item as do_item_get just got
- do_item_alloc sees refcount == 0 since do_item_get incrs it at the bottom,
and starts messing with the item
- do_item_get runs its tests and maybe even refcount++'s and returns the item
- evil shit happens.
This race is much more likely to hit during the slab reallocation work, so I'm
fixing it even though it's almost impossible to cause.
Also cleaned up the logic so it's not testing the item for NULL more than
once. Far fewer branches now, though I did not examine gcc's output to see if
it is optimized differently.
-rw-r--r-- | items.c | 56 |
1 files changed, 28 insertions, 28 deletions
@@ -158,9 +158,8 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim if (it == NULL) { itemstats[id].outofmemory++; - /* Last ditch effort. There is a very rare bug which causes - * refcount leaks. We've fixed most of them, but it still happens, - * and it may happen in the future. + /* Last ditch effort. There was a very rare bug which caused + * refcount leaks. We leave this just in case they ever happen again. * We can reasonably assume no item can stay locked for more than * three hours, so if we find one in the tail which is that old, * free it anyway. @@ -310,7 +309,7 @@ void do_item_unlink(item *it, const uint32_t hv) { } } -/* FIXME: Is it necessary to keep thsi copy/pasted code? */ +/* FIXME: Is it necessary to keep this copy/pasted code? */ void do_item_unlink_nolock(item *it, const uint32_t hv) { MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); if ((it->it_flags & ITEM_LINKED) != 0) { @@ -478,6 +477,8 @@ void do_item_stats_sizes(ADD_STAT add_stats, void *c) { item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) { mutex_lock(&cache_lock); item *it = assoc_find(key, nkey, hv); + if (it != NULL) + it->refcount++; pthread_mutex_unlock(&cache_lock); int was_found = 0; @@ -490,31 +491,30 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) { } } - if (it != NULL && settings.oldest_live != 0 && settings.oldest_live <= current_time && - it->time <= settings.oldest_live) { - do_item_unlink(it, hv); /* MTSAFE - item_lock held */ - it = NULL; - } - - if (it == NULL && was_found) { - fprintf(stderr, " -nuked by flush"); - was_found--; - } - - if (it != NULL && it->exptime != 0 && it->exptime <= current_time) { - do_item_unlink(it, hv); /* MTSAFE - item_lock held */ - it = NULL; - } - - if (it == NULL && was_found) { - fprintf(stderr, " -nuked by expire"); - was_found--; - } - if (it != NULL) { - it->refcount++; - it->it_flags |= ITEM_FETCHED; - DEBUG_REFCNT(it, '+'); + if (settings.oldest_live != 0 && settings.oldest_live <= current_time && + it->time <= settings.oldest_live) { + mutex_lock(&cache_lock); + it->refcount--; + do_item_unlink_nolock(it, hv); + pthread_mutex_unlock(&cache_lock); + it = NULL; + if (was_found) { + fprintf(stderr, " -nuked by flush"); + } + } else if (it->exptime != 0 && it->exptime <= current_time) { + mutex_lock(&cache_lock); + it->refcount--; + do_item_unlink_nolock(it, hv); + pthread_mutex_unlock(&cache_lock); + it = NULL; + if (was_found) { + fprintf(stderr, " -nuked by expire"); + } + } else { + it->it_flags |= ITEM_FETCHED; + DEBUG_REFCNT(it, '+'); + } } if (settings.verbose > 2) |