summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-10-04 22:50:30 -0700
committerSage Weil <sage@inktank.com>2013-10-04 22:57:35 -0700
commit04091f87dd3c56c63cb940839db8b7699aeb1780 (patch)
tree408db28df256cecaf6ae47ddbc6b3422c8c2f13e
parentd07f5f358841546391c4df54ae83742533aadfd8 (diff)
downloadceph-04091f87dd3c56c63cb940839db8b7699aeb1780.tar.gz
common/bloom_filter: remember original target size
This isn't strictly needed for core functionality, but it is convenient to know. Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/common/bloom_filter.cc32
-rw-r--r--src/common/bloom_filter.hpp36
2 files changed, 42 insertions, 26 deletions
diff --git a/src/common/bloom_filter.cc b/src/common/bloom_filter.cc
index a7a4c1d0796..41cad174909 100644
--- a/src/common/bloom_filter.cc
+++ b/src/common/bloom_filter.cc
@@ -8,7 +8,8 @@ void bloom_filter::encode(bufferlist& bl) const
{
ENCODE_START(2, 2, bl);
::encode((uint64_t)salt_count_, bl);
- ::encode((uint64_t)inserted_element_count_, bl);
+ ::encode((uint64_t)insert_count_, bl);
+ ::encode((uint64_t)target_element_count_, bl);
::encode((uint64_t)random_seed_, bl);
bufferptr bp((const char*)bit_table_, table_size_);
::encode(bp, bl);
@@ -22,7 +23,9 @@ void bloom_filter::decode(bufferlist::iterator& p)
::decode(v, p);
salt_count_ = v;
::decode(v, p);
- inserted_element_count_ = v;
+ insert_count_ = v;
+ ::decode(v, p);
+ target_element_count_ = v;
::decode(v, p);
random_seed_ = v;
bufferlist t;
@@ -46,7 +49,8 @@ void bloom_filter::dump(Formatter *f) const
{
f->dump_unsigned("salt_count", salt_count_);
f->dump_unsigned("table_size", table_size_);
- f->dump_unsigned("insert_count", inserted_element_count_);
+ f->dump_unsigned("insert_count", insert_count_);
+ f->dump_unsigned("target_element_count", target_element_count_);
f->dump_unsigned("random_seed", random_seed_);
f->open_array_section("salt_table");
@@ -62,11 +66,11 @@ void bloom_filter::dump(Formatter *f) const
void bloom_filter::generate_test_instances(list<bloom_filter*>& ls)
{
- ls.push_back(new bloom_filter(10, .5, 1));
- ls.push_back(new bloom_filter(10, .5, 1));
+ ls.push_back(new bloom_filter(10, .5, 1, 5));
+ ls.push_back(new bloom_filter(10, .5, 1, 5));
ls.back()->insert("foo");
ls.back()->insert("bar");
- ls.push_back(new bloom_filter(50, .5, 1));
+ ls.push_back(new bloom_filter(50, .5, 1, 5));
ls.back()->insert("foo");
ls.back()->insert("bar");
ls.back()->insert("baz");
@@ -84,7 +88,8 @@ void compressible_bloom_filter::encode(bufferlist& bl) const
for (vector<size_t>::const_iterator p = size_list.begin();
p != size_list.end(); ++p)
::encode((uint64_t)*p, bl);
- ::encode((uint64_t)inserted_element_count_, bl);
+ ::encode((uint64_t)insert_count_, bl);
+ ::encode((uint64_t)target_element_count_, bl);
::encode((uint64_t)random_seed_, bl);
bufferptr bp((const char*)bit_table_, table_size_);
::encode(bp, bl);
@@ -107,7 +112,9 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p)
}
::decode(v, p);
- inserted_element_count_ = v;
+ insert_count_ = v;
+ ::decode(v, p);
+ target_element_count_ = v;
::decode(v, p);
random_seed_ = v;
bufferlist t;
@@ -132,7 +139,8 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p)
void compressible_bloom_filter::dump(Formatter *f) const
{
f->dump_unsigned("salt_count", salt_count_);
- f->dump_unsigned("insert_count", inserted_element_count_);
+ f->dump_unsigned("insert_count", insert_count_);
+ f->dump_unsigned("target_element_count", target_element_count_);
f->dump_unsigned("random_seed", random_seed_);
f->dump_unsigned("table_size", table_size_);
f->open_array_section("table_sizes");
@@ -154,11 +162,11 @@ void compressible_bloom_filter::dump(Formatter *f) const
void compressible_bloom_filter::generate_test_instances(list<compressible_bloom_filter*>& ls)
{
- ls.push_back(new compressible_bloom_filter(10, .5, 1));
- ls.push_back(new compressible_bloom_filter(10, .5, 1));
+ ls.push_back(new compressible_bloom_filter(10, .5, 1, 4));
+ ls.push_back(new compressible_bloom_filter(10, .5, 1, 4));
ls.back()->insert("foo");
ls.back()->insert("bar");
- ls.push_back(new compressible_bloom_filter(50, .5, 1));
+ ls.push_back(new compressible_bloom_filter(50, .5, 1, 4));
ls.back()->insert("foo");
ls.back()->insert("bar");
ls.back()->insert("baz");
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp
index 50349560feb..c336eef8b6a 100644
--- a/src/common/bloom_filter.hpp
+++ b/src/common/bloom_filter.hpp
@@ -57,7 +57,8 @@ protected:
std::vector<bloom_type> salt_; ///< vector of salts
std::size_t salt_count_; ///< number of salts
std::size_t table_size_; ///< bit table size in bytes
- std::size_t inserted_element_count_; ///< insertion count
+ std::size_t insert_count_; ///< insertion count
+ std::size_t target_element_count_; ///< target number of unique insertions
std::size_t random_seed_; ///< random seed
public:
@@ -66,7 +67,8 @@ public:
: bit_table_(0),
salt_count_(0),
table_size_(0),
- inserted_element_count_(0),
+ insert_count_(0),
+ target_element_count_(0),
random_seed_(0)
{}
@@ -74,7 +76,8 @@ public:
const double& false_positive_probability,
const std::size_t& random_seed)
: bit_table_(0),
- inserted_element_count_(0),
+ insert_count_(0),
+ target_element_count_(predicted_inserted_element_count),
random_seed_((random_seed) ? random_seed : 0xA5A5A5A5)
{
find_optimal_parameters(predicted_inserted_element_count, false_positive_probability,
@@ -84,11 +87,13 @@ public:
bloom_filter(const std::size_t& salt_count,
std::size_t table_size,
- const std::size_t& random_seed)
+ const std::size_t& random_seed,
+ std::size_t target_element_count)
: bit_table_(0),
salt_count_(salt_count),
table_size_(table_size),
- inserted_element_count_(0),
+ insert_count_(0),
+ target_element_count_(target_element_count),
random_seed_((random_seed) ? random_seed : 0xA5A5A5A5)
{
init();
@@ -110,7 +115,7 @@ public:
if (this != &filter) {
salt_count_ = filter.salt_count_;
table_size_ = filter.table_size_;
- inserted_element_count_ = filter.inserted_element_count_;
+ insert_count_ = filter.insert_count_;
random_seed_ = filter.random_seed_;
delete[] bit_table_;
bit_table_ = new cell_type[table_size_];
@@ -133,7 +138,7 @@ public:
inline void clear()
{
std::fill_n(bit_table_, table_size_, 0x00);
- inserted_element_count_ = 0;
+ insert_count_ = 0;
}
/**
@@ -153,7 +158,7 @@ public:
compute_indices(hash_ap(val,salt_[i]),bit_index,bit);
bit_table_[bit_index >> 3] |= bit_mask[bit];
}
- ++inserted_element_count_;
+ ++insert_count_;
}
inline void insert(const unsigned char* key_begin, const std::size_t& length)
@@ -165,7 +170,7 @@ public:
compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit);
bit_table_[bit_index >> 3] |= bit_mask[bit];
}
- ++inserted_element_count_;
+ ++insert_count_;
}
template<typename T>
@@ -288,7 +293,9 @@ public:
inline std::size_t element_count() const
{
- return inserted_element_count_;
+ return insert_count_;
+ }
+
}
inline double effective_fpp() const
@@ -300,7 +307,7 @@ public:
the current number of inserted elements - not the user defined
predicated/expected number of inserted elements.
*/
- return std::pow(1.0 - std::exp(-1.0 * salt_.size() * inserted_element_count_ / size()), 1.0 * salt_.size());
+ return std::pow(1.0 - std::exp(-1.0 * salt_.size() * insert_count_ / size()), 1.0 * salt_.size());
}
inline bloom_filter& operator &= (const bloom_filter& filter)
@@ -552,15 +559,16 @@ public:
compressible_bloom_filter(const std::size_t& predicted_element_count,
const double& false_positive_probability,
const std::size_t& random_seed)
- : bloom_filter(predicted_element_count,false_positive_probability,random_seed)
+ : bloom_filter(predicted_element_count, false_positive_probability, random_seed)
{
size_list.push_back(table_size_);
}
compressible_bloom_filter(const std::size_t& salt_count,
std::size_t table_size,
- const std::size_t& random_seed)
- : bloom_filter(salt_count, table_size, random_seed)
+ const std::size_t& random_seed,
+ std::size_t target_count)
+ : bloom_filter(salt_count, table_size, random_seed, target_count)
{
size_list.push_back(table_size_);
}