summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-09-19 20:02:50 -0700
committerSage Weil <sage@inktank.com>2013-09-19 20:02:50 -0700
commit19cc85e3e9ff60c1830fae06eadf4c6ca2e8edc0 (patch)
treed220ee2280d703fbff3c51540a2b49e7fd102c8f
parent535d9d39daf79fcc5867465672f9b17760e8bb51 (diff)
downloadceph-wip-bloom.tar.gz
common/bloom_filter: insert/contains methods for uint32_twip-bloom
This will let us pass in an hobject_t::hash directly (for example) without rehashing a string. Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/common/bloom_filter.hpp35
-rw-r--r--src/test/common/test_bloom_filter.cc38
2 files changed, 73 insertions, 0 deletions
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp
index 15400b14b9e..6d5f645d8c9 100644
--- a/src/common/bloom_filter.hpp
+++ b/src/common/bloom_filter.hpp
@@ -131,6 +131,17 @@ public:
inserted_element_count_ = 0;
}
+ inline void insert(uint32_t val) {
+ std::size_t bit_index = 0;
+ std::size_t bit = 0;
+ for (std::size_t i = 0; i < salt_.size(); ++i)
+ {
+ compute_indices(hash_ap(val,salt_[i]),bit_index,bit);
+ bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
+ }
+ ++inserted_element_count_;
+ }
+
inline void insert(const unsigned char* key_begin, const std::size_t& length)
{
std::size_t bit_index = 0;
@@ -170,6 +181,21 @@ public:
}
}
+ inline virtual bool contains(uint32_t val) const
+ {
+ std::size_t bit_index = 0;
+ std::size_t bit = 0;
+ for (std::size_t i = 0; i < salt_.size(); ++i)
+ {
+ compute_indices(hash_ap(val,salt_[i]),bit_index,bit);
+ if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const
{
std::size_t bit_index = 0;
@@ -425,6 +451,15 @@ protected:
*table_size = t;
}
+ inline bloom_type hash_ap(uint32_t val, bloom_type hash) const
+ {
+ hash ^= (hash << 7) ^ ((val & 0xff000000) >> 24) * (hash >> 3);
+ hash ^= (~((hash << 11) + (((val & 0xff0000) >> 16) ^ (hash >> 5))));
+ hash ^= (hash << 7) ^ ((val & 0xff00) >> 8) * (hash >> 3);
+ hash ^= (~((hash << 11) + (((val & 0xff)) ^ (hash >> 5))));
+ return hash;
+ }
+
inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const
{
const unsigned char* itr = begin;
diff --git a/src/test/common/test_bloom_filter.cc b/src/test/common/test_bloom_filter.cc
index a8935ff6f93..9ef2dac14c5 100644
--- a/src/test/common/test_bloom_filter.cc
+++ b/src/test/common/test_bloom_filter.cc
@@ -61,6 +61,44 @@ TEST(BloomFilter, Sweep) {
}
}
+TEST(BloomFilter, SweepInt) {
+ std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl;
+ for (int ex = 3; ex < 12; ex++) {
+ for (float fpp = .001; fpp < .5; fpp *= 2.0) {
+ int max = 2 << ex;
+ bloom_filter bf(max, fpp, 1);
+ bf.insert("foo");
+ bf.insert("bar");
+
+ ASSERT_TRUE(123);
+ ASSERT_TRUE(456);
+
+ for (int n = 0; n < max; n++)
+ bf.insert(n);
+
+ int test = max * 100;
+ int hit = 0;
+ for (int n = 0; n < test; n++)
+ if (bf.contains(100000 + n))
+ hit++;
+
+ ASSERT_TRUE(123);
+ ASSERT_TRUE(456);
+
+ double actual = (double)hit / (double)test;
+
+ bufferlist bl;
+ ::encode(bf, bl);
+
+ double byte_per_insert = (double)bl.length() / (double)max;
+
+ std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl;
+ ASSERT_TRUE(actual < fpp * 10);
+
+ }
+ }
+}
+
// test the fpp over a sequence of bloom filters, each with unique
// items inserted into it.
//