summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-10-04 22:49:41 -0700
committerSage Weil <sage@inktank.com>2013-10-06 10:18:35 -0700
commitf45c3b98d3941c242d428c971b494fdc5216bd18 (patch)
tree780e8bf636833289935e28f4c517155a1fd1b6e1
parent055dc4c1f22bf7751059d589c33ba5c32b009184 (diff)
downloadceph-f45c3b98d3941c242d428c971b494fdc5216bd18.tar.gz
common/bloom_filter: fix compress; improve argument
Fix bug in compress() when compressing to less than .5 or original. Make the argument have sane units (target size relative to current size; not a precentage reduction). Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/common/bloom_filter.hpp10
-rw-r--r--src/test/common/test_bloom_filter.cc58
2 files changed, 64 insertions, 4 deletions
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp
index 8587caa2d8d..3e55687774b 100644
--- a/src/common/bloom_filter.hpp
+++ b/src/common/bloom_filter.hpp
@@ -603,15 +603,15 @@ public:
return size_list.back() * bits_per_char;
}
- inline bool compress(const double& percentage)
+ inline bool compress(const double& target_ratio)
{
- if ((0.0 >= percentage) || (percentage >= 100.0))
+ if ((0.0 >= target_ratio) || (target_ratio >= 1.0))
{
return false;
}
std::size_t original_table_size = size_list.back();
- std::size_t new_table_size = static_cast<std::size_t>((size_list.back() * (1.0 - (percentage / 100.0))));
+ std::size_t new_table_size = static_cast<std::size_t>(size_list.back() * target_ratio);
if ((!new_table_size) || (new_table_size >= original_table_size))
{
@@ -623,10 +623,12 @@ public:
cell_type* itr = bit_table_ + (new_table_size);
cell_type* end = bit_table_ + (original_table_size);
cell_type* itr_tmp = tmp;
-
+ cell_type* itr_end = tmp + (new_table_size);
while (end != itr)
{
*(itr_tmp++) |= (*itr++);
+ if (itr_tmp == itr_end)
+ itr_tmp = tmp;
}
delete[] bit_table_;
diff --git a/src/test/common/test_bloom_filter.cc b/src/test/common/test_bloom_filter.cc
index 5edf3f0f9fa..71d3de97d2a 100644
--- a/src/test/common/test_bloom_filter.cc
+++ b/src/test/common/test_bloom_filter.cc
@@ -24,6 +24,8 @@ TEST(BloomFilter, Basic) {
}
TEST(BloomFilter, Sweep) {
+ std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ std::cout.precision(5);
std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl;
for (int ex = 3; ex < 12; ex += 2) {
for (float fpp = .001; fpp < .5; fpp *= 4.0) {
@@ -62,6 +64,8 @@ TEST(BloomFilter, Sweep) {
}
TEST(BloomFilter, SweepInt) {
+ std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ std::cout.precision(5);
std::cout << "# max\tfpp\tactual\tsize\tB/insert\tdensity\tapprox_element_count" << std::endl;
for (int ex = 3; ex < 12; ex += 2) {
for (float fpp = .001; fpp < .5; fpp *= 4.0) {
@@ -96,12 +100,66 @@ TEST(BloomFilter, SweepInt) {
<< "\t" << bf.density() << "\t" << bf.approx_unique_element_count() << std::endl;
ASSERT_TRUE(actual < fpp * 10);
ASSERT_TRUE(actual > fpp / 10);
+ ASSERT_TRUE(bf.density() > 0.40);
+ ASSERT_TRUE(bf.density() < 0.60);
}
}
}
+TEST(BloomFilter, CompressibleSweep) {
+ std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ std::cout.precision(5);
+ std::cout << "# max\tins\test ins\tafter\ttgtfpp\tactual\tsize\tb/elem\n";
+ float fpp = .01;
+ int max = 1024;
+ for (int div = 1; div < 10; div++) {
+ compressible_bloom_filter bf(max, fpp, 1);
+ int t = max/div;
+ for (int n = 0; n < t; n++)
+ bf.insert(n);
+
+ unsigned est = bf.approx_unique_element_count();
+ if (div > 1)
+ bf.compress(1.0 / div);
+
+ for (int n = 0; n < t; n++)
+ ASSERT_TRUE(bf.contains(n));
+
+ int test = max * 100;
+ int hit = 0;
+ for (int n = 0; n < test; n++)
+ if (bf.contains(100000 + n))
+ hit++;
+
+ double actual = (double)hit / (double)test;
+
+ bufferlist bl;
+ ::encode(bf, bl);
+
+ double byte_per_insert = (double)bl.length() / (double)max;
+ unsigned est_after = bf.approx_unique_element_count();
+ std::cout << max
+ << "\t" << t
+ << "\t" << est
+ << "\t" << est_after
+ << "\t" << fpp
+ << "\t" << actual
+ << "\t" << bl.length() << "\t" << byte_per_insert
+ << std::endl;
+
+ ASSERT_TRUE(actual < fpp * 2.0);
+ ASSERT_TRUE(actual > fpp / 2.0);
+ ASSERT_TRUE(est_after < est * 2);
+ ASSERT_TRUE(est_after > est / 2);
+ }
+}
+
+
+
TEST(BloomFilter, BinSweep) {
+ std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ std::cout.precision(5);
int total_max = 16384;
float total_fpp = .01;
std::cout << "total_inserts " << total_max << " target-fpp " << total_fpp << std::endl;