summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-10-04 22:50:58 -0700
committerSage Weil <sage@inktank.com>2013-10-04 22:57:35 -0700
commitaaabc657396255101cb3311fb80f84e26fab3705 (patch)
treef28ca563caa91daa4cfa90d3b09116a82004f5f5
parent04091f87dd3c56c63cb940839db8b7699aeb1780 (diff)
downloadceph-aaabc657396255101cb3311fb80f84e26fab3705.tar.gz
common/bloom_filter: methods for density, approx unique element counts
Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/common/bloom_filter.hpp25
-rw-r--r--src/test/common/test_bloom_filter.cc5
2 files changed, 28 insertions, 2 deletions
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp
index c336eef8b6a..4b711526676 100644
--- a/src/common/bloom_filter.hpp
+++ b/src/common/bloom_filter.hpp
@@ -296,6 +296,31 @@ public:
return insert_count_;
}
+ /*
+ * density of bits set. inconvenient units, but:
+ * .3 = ~50% target insertions
+ * .5 = 100% target insertions, "perfectly full"
+ * .75 = 200% target insertions
+ * 1.0 = all bits set... infinite insertions
+ */
+ inline double density() const
+ {
+ size_t set = 0;
+ uint8_t *p = bit_table_;
+ size_t left = table_size_;
+ while (left-- > 0) {
+ uint8_t c = *p;
+ for (; c; ++set)
+ c &= c - 1;
+ ++p;
+ }
+ return (double)set / (double)(table_size_ << 3);
+ }
+
+ inline double approx_unique_element_count() const {
+ // this is not a very good estimate; a better solution should have
+ // some asymptotic behavior as density() approaches 1.0.
+ return (double)target_element_count_ * 2.0 * density();
}
inline double effective_fpp() const
diff --git a/src/test/common/test_bloom_filter.cc b/src/test/common/test_bloom_filter.cc
index 8e3661b2cc1..5edf3f0f9fa 100644
--- a/src/test/common/test_bloom_filter.cc
+++ b/src/test/common/test_bloom_filter.cc
@@ -62,7 +62,7 @@ TEST(BloomFilter, Sweep) {
}
TEST(BloomFilter, SweepInt) {
- std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl;
+ std::cout << "# max\tfpp\tactual\tsize\tB/insert\tdensity\tapprox_element_count" << std::endl;
for (int ex = 3; ex < 12; ex += 2) {
for (float fpp = .001; fpp < .5; fpp *= 4.0) {
int max = 2 << ex;
@@ -92,7 +92,8 @@ TEST(BloomFilter, SweepInt) {
double byte_per_insert = (double)bl.length() / (double)max;
- std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl;
+ std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert
+ << "\t" << bf.density() << "\t" << bf.approx_unique_element_count() << std::endl;
ASSERT_TRUE(actual < fpp * 10);
ASSERT_TRUE(actual > fpp / 10);
}