summaryrefslogtreecommitdiff
path: root/src/intset.c
diff options
context:
space:
mode:
authorAlon Diamant <diamant.alon@gmail.com>2014-03-11 15:38:55 +0100
committerAlon Diamant <alon@everything.me>2014-12-14 12:25:42 +0200
commit288028876f4428edfc044d8a1f1d6784b0dbe739 (patch)
treed5e4e55d9f639f2ab4dea25afb92b16e88618f3f /src/intset.c
parentc147cd848760c44791d73b03dbe2a4b8aa5b8c8e (diff)
downloadredis-288028876f4428edfc044d8a1f1d6784b0dbe739.tar.gz
Added <count> parameter to SPOP:
spopCommand() now runs spopWithCountCommand() in case the <count> param is found. Added intsetRandomMembers() to Intset: Copies N random members from the set into inputted 'values' array. Uses either the Knuth or Floyd sample algos depending on ratio count/size. Added setTypeRandomElements() to SET type: Returns a number of random elements from a non empty set. This is a version of setTypeRandomElement() that is modified in order to return multiple entries, using dictGetRandomKeys() and intsetRandomMembers(). Added tests for SPOP with <count>: unit/type/set, unit/scripting, integration/aof -- Cleaned up code a bit to match with required Redis coding style
Diffstat (limited to 'src/intset.c')
-rw-r--r--src/intset.c103
1 files changed, 100 insertions, 3 deletions
diff --git a/src/intset.c b/src/intset.c
index 5d894e3cd..21a65994f 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -261,6 +261,100 @@ int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%intrev32ifbe(is->length));
}
+/* How many times bigger should the set length be compared to the requested
+ * count of members for us to use the Floyd algorithm instead of
+ the Knuth algorithm */
+#define RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO (2)
+
+/* Copies 'count' random members from the set into the 'values' array.
+ 'values' must be an array of int64_t values, of length 'count'.
+ Returns the amount of items returned. If this amount is less than 'count',
+ then the remaining 'values' are left uninitialized. */
+int intsetRandomMembers(intset *is, int64_t* values, int count) {
+
+ /* We don't check that is and values are non-NULL - the caller must
+ play nice. */
+ /* redisAssert(is != NULL);
+ redisAssert(values != NULL);
+ redisAssert(count > 0); */
+
+ int length = intsetLen(is);
+
+ if (count > length) {
+ /* Return everything in the set */
+ count = length;
+ }
+
+ /* Choose between the Knuth shuffle algorithm, O(1) space, O(length) time,
+ and the Floyd algorithm, O(length) space, O(count) time. */
+ if ((RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO * count) > length) {
+
+ /* If the count of members requested is almost the length of the set,
+ use the Knuth shuffle algorithm, O(1) space, O(length) time. */
+
+ /* First, fill the values array with unique random indexes inside
+ the set. */
+ int in, im, rn, rm;
+ im = 0;
+ for (in = 0;
+ in < length && im < count;
+ in++) {
+
+ rn = length - in;
+ rm = count - im;
+ if (rand() % rn < rm) {
+ values[im++] = in;
+ }
+ }
+
+ } else {
+
+ /* If the length is considerably more than the count of members
+ requested, use Robert Floyd's algorithm, O(length) space,
+ O(count) time.
+ Based on Jon Bentley's Programming Pearls */
+
+ int64_t *is_used = zcalloc(sizeof(int64_t) * length);
+ int in, im, r;
+
+ r = 0;
+ im = 0;
+
+ for (in = length - count;
+ in < length && im < count;
+ in++) {
+
+ /* Generate a random number r */
+ r = rand() % (in + 1);
+
+ /* Do we already have the value in r? */
+ if (is_used[r]) {
+
+ /* Use in instead of the generated number */
+ r = in;
+ }
+
+ values[im++] = r ;
+
+ /* Mark it as used */
+ is_used[r] = 1;
+ }
+
+ zfree(is_used);
+ }
+
+ /* Replace each random index with the value stored there in the intset */
+ uint8_t encoding = intrev32ifbe(is->encoding);
+ for (int currentValue = 0;
+ currentValue < count;
+ currentValue++) {
+ values[currentValue] =
+ _intsetGetEncoded(is, values[currentValue], encoding);
+ }
+
+ return count;
+}
+
/* Sets the value to the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
@@ -363,8 +457,10 @@ int main(int argc, char **argv) {
assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
- assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
- assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(-9223372036854775808ull) ==
+ INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(+9223372036854775807ull) ==
+ INTSET_ENC_INT64);
ok();
}
@@ -461,7 +557,8 @@ int main(int argc, char **argv) {
start = usec();
for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
- printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
+ printf("%ld lookups, %ld element set, %lldusec\n",
+ num,size,usec()-start);
}
printf("Stress add+delete: "); {