diff options
author | Alon Diamant <diamant.alon@gmail.com> | 2014-03-11 15:38:55 +0100 |
---|---|---|
committer | Alon Diamant <alon@everything.me> | 2014-12-14 12:25:42 +0200 |
commit | 288028876f4428edfc044d8a1f1d6784b0dbe739 (patch) | |
tree | d5e4e55d9f639f2ab4dea25afb92b16e88618f3f /src/intset.c | |
parent | c147cd848760c44791d73b03dbe2a4b8aa5b8c8e (diff) | |
download | redis-288028876f4428edfc044d8a1f1d6784b0dbe739.tar.gz |
Added <count> parameter to SPOP:
spopCommand() now runs spopWithCountCommand() in case the <count> param is found.
Added intsetRandomMembers() to Intset: Copies N random members from the set into inputted 'values' array. Uses either the Knuth or Floyd sample algos depending on ratio count/size.
Added setTypeRandomElements() to SET type: Returns a number of random elements from a non empty set. This is a version of setTypeRandomElement() that is modified in order to return multiple entries, using dictGetRandomKeys() and intsetRandomMembers().
Added tests for SPOP with <count>: unit/type/set, unit/scripting, integration/aof
--
Cleaned up code a bit to match with required Redis coding style
Diffstat (limited to 'src/intset.c')
-rw-r--r-- | src/intset.c | 103 |
1 files changed, 100 insertions, 3 deletions
diff --git a/src/intset.c b/src/intset.c index 5d894e3cd..21a65994f 100644 --- a/src/intset.c +++ b/src/intset.c @@ -261,6 +261,100 @@ int64_t intsetRandom(intset *is) { return _intsetGet(is,rand()%intrev32ifbe(is->length)); } +/* How many times bigger should the set length be compared to the requested + * count of members for us to use the Floyd algorithm instead of + the Knuth algorithm */ +#define RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO (2) + +/* Copies 'count' random members from the set into the 'values' array. + 'values' must be an array of int64_t values, of length 'count'. + Returns the amount of items returned. If this amount is less than 'count', + then the remaining 'values' are left uninitialized. */ +int intsetRandomMembers(intset *is, int64_t* values, int count) { + + /* We don't check that is and values are non-NULL - the caller must + play nice. */ + /* redisAssert(is != NULL); + redisAssert(values != NULL); + redisAssert(count > 0); */ + + int length = intsetLen(is); + + if (count > length) { + /* Return everything in the set */ + count = length; + } + + /* Choose between the Knuth shuffle algorithm, O(1) space, O(length) time, + and the Floyd algorithm, O(length) space, O(count) time. */ + if ((RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO * count) > length) { + + /* If the count of members requested is almost the length of the set, + use the Knuth shuffle algorithm, O(1) space, O(length) time. */ + + /* First, fill the values array with unique random indexes inside + the set. */ + int in, im, rn, rm; + im = 0; + for (in = 0; + in < length && im < count; + in++) { + + rn = length - in; + rm = count - im; + if (rand() % rn < rm) { + values[im++] = in; + } + } + + } else { + + /* If the length is considerably more than the count of members + requested, use Robert Floyd's algorithm, O(length) space, + O(count) time. + Based on Jon Bentley's Programming Pearls */ + + int64_t *is_used = zcalloc(sizeof(int64_t) * length); + int in, im, r; + + r = 0; + im = 0; + + for (in = length - count; + in < length && im < count; + in++) { + + /* Generate a random number r */ + r = rand() % (in + 1); + + /* Do we already have the value in r? */ + if (is_used[r]) { + + /* Use in instead of the generated number */ + r = in; + } + + values[im++] = r ; + + /* Mark it as used */ + is_used[r] = 1; + } + + zfree(is_used); + } + + /* Replace each random index with the value stored there in the intset */ + uint8_t encoding = intrev32ifbe(is->encoding); + for (int currentValue = 0; + currentValue < count; + currentValue++) { + values[currentValue] = + _intsetGetEncoded(is, values[currentValue], encoding); + } + + return count; +} + /* Sets the value to the value at the given position. When this position is * out of range the function returns 0, when in range it returns 1. */ uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) { @@ -363,8 +457,10 @@ int main(int argc, char **argv) { assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32); assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64); assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64); - assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64); - assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64); + assert(_intsetValueEncoding(-9223372036854775808ull) == + INTSET_ENC_INT64); + assert(_intsetValueEncoding(+9223372036854775807ull) == + INTSET_ENC_INT64); ok(); } @@ -461,7 +557,8 @@ int main(int argc, char **argv) { start = usec(); for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL); - printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start); + printf("%ld lookups, %ld element set, %lldusec\n", + num,size,usec()-start); } printf("Stress add+delete: "); { |