summaryrefslogtreecommitdiff
path: root/src/intset.c
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2015-02-10 22:59:12 +0100
committerantirez <antirez@gmail.com>2015-02-11 10:52:28 +0100
commit9feee428f25a5681a06cd13ed1c4cc644759e719 (patch)
treeac3cc1ce859e91f7dffad04be400303065519166 /src/intset.c
parent55003f7a118beb20ed1bc65926e29e207c3b721a (diff)
downloadredis-9feee428f25a5681a06cd13ed1c4cc644759e719.tar.gz
SPOP: reimplemented for speed and better distribution.
The old version of SPOP with "count" argument used an API call of dict.c which was actually designed for a different goal, and was not capable of good distribution. We follow a different three-cases approach optimized for different ratiion between sets and requested number of elements. The implementation is simpler and allowed the removal of a large amount of code.
Diffstat (limited to 'src/intset.c')
-rw-r--r--src/intset.c84
1 files changed, 0 insertions, 84 deletions
diff --git a/src/intset.c b/src/intset.c
index 762bd48c8..b0a597fc7 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -261,90 +261,6 @@ int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%intrev32ifbe(is->length));
}
-/* How many times bigger should the set length be compared to the requested
- * count of members for us to use the Floyd algorithm instead of
- * the Knuth algorithm */
-#define RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO (2)
-
-/* Copies 'count' random members from the set into the 'values' array.
- * 'values' must be an array of int64_t values, of length 'count'.
- * Returns the amount of items returned. If this amount is less than 'count',
- * then the remaining 'values' are left uninitialized. */
-int intsetRandomMembers(intset *is, int64_t* values, int count) {
-
- /* We don't check that is and values are non-NULL - the caller must
- * play nice. */
-
- int length = intsetLen(is);
-
- if (count > length) {
- /* Return everything in the set */
- count = length;
- }
-
- /* Choose between the Knuth shuffle algorithm, O(1) space, O(length) time,
- * and the Floyd algorithm, O(length) space, O(count) time. */
- if ((RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO * count) > length) {
-
- /* If the count of members requested is almost the length of the set,
- * use the Knuth shuffle algorithm, O(1) space, O(length) time. */
-
- /* First, fill the values array with unique random indexes inside
- * the set. */
- int in, im, rn, rm;
- im = 0;
- for (in = 0; in < length && im < count; in++) {
-
- rn = length - in;
- rm = count - im;
- if (rand() % rn < rm) {
- values[im++] = in;
- }
- }
-
- } else {
-
- /* If the length is considerably more than the count of members
- * requested, use Robert Floyd's algorithm, O(length) space,
- * O(count) time.
- * Based on Jon Bentley's Programming Pearls */
-
- int64_t *is_used = zcalloc(sizeof(int64_t) * length);
- int in, im, r;
-
- r = 0;
- im = 0;
-
- for (in = length - count; in < length && im < count; in++) {
-
- /* Generate a random number r */
- r = rand() % (in + 1);
-
- /* Do we already have the value in r? */
- if (is_used[r]) {
- /* Use in instead of the generated number */
- r = in;
- }
-
- values[im++] = r ;
-
- /* Mark it as used */
- is_used[r] = 1;
- }
-
- zfree(is_used);
- }
-
- /* Replace each random index with the value stored there in the intset */
- uint8_t encoding = intrev32ifbe(is->encoding);
- for (int currentValue = 0; currentValue < count; currentValue++) {
- values[currentValue] =
- _intsetGetEncoded(is, values[currentValue], encoding);
- }
-
- return count;
-}
-
/* Sets the value to the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {