summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-09-19 17:57:14 -0700
committerSage Weil <sage@inktank.com>2013-10-02 14:09:12 -0700
commitf1584fb05c57e47dcee218982e27a74ee4d8a227 (patch)
treea0f06ee489a99573746afd4c90684a69a331afa4
parent12aa53cc940766b5ef1aabbbd1a252659d9654ef (diff)
downloadceph-f1584fb05c57e47dcee218982e27a74ee4d8a227.tar.gz
common/bloom_filter: unit tests
Fun facts: - fpp = false positive probability - fpp is a function of insert count only - at .1% fpp, we pay about 2 bytes per insert - at 1-2% fpp, we pay about 1 byte per insert - at 15% fpp, we pay about .5 bytes per insert Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/common/bloom_filter.hpp3
-rw-r--r--src/test/Makefile.am5
-rw-r--r--src/test/common/test_bloom_filter.cc62
3 files changed, 69 insertions, 1 deletions
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp
index 2a1ee2c4217..cc22136e5ca 100644
--- a/src/common/bloom_filter.hpp
+++ b/src/common/bloom_filter.hpp
@@ -26,6 +26,7 @@
#include <algorithm>
#include <cmath>
#include <limits>
+#include <list>
#include <string>
#include <vector>
@@ -470,7 +471,7 @@ public:
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
void dump(Formatter *f) const;
- static void generate_test_instances(list<bloom_filter*>& ls);
+ static void generate_test_instances(std::list<bloom_filter*>& ls);
};
WRITE_CLASS_ENCODER(bloom_filter)
diff --git a/src/test/Makefile.am b/src/test/Makefile.am
index 88cf1ce970f..d2d76f4008f 100644
--- a/src/test/Makefile.am
+++ b/src/test/Makefile.am
@@ -258,6 +258,11 @@ unittest_addrs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_addrs_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
check_PROGRAMS += unittest_addrs
+unittest_bloom_filter_SOURCES = test/common/test_bloom_filter.cc
+unittest_bloom_filter_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_bloom_filter_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+check_PROGRAMS += unittest_bloom_filter
+
unittest_sharedptr_registry_SOURCES = test/common/test_sharedptr_registry.cc
unittest_sharedptr_registry_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_sharedptr_registry_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
diff --git a/src/test/common/test_bloom_filter.cc b/src/test/common/test_bloom_filter.cc
new file mode 100644
index 00000000000..8be52511362
--- /dev/null
+++ b/src/test/common/test_bloom_filter.cc
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank <info@inktank.com>
+ *
+ * LGPL2.1 (see COPYING-LGPL2.1) or later
+ */
+
+#include <iostream>
+#include <gtest/gtest.h>
+
+#include "include/stringify.h"
+#include "common/bloom_filter.hpp"
+
+TEST(BloomFilter, Basic) {
+ bloom_filter bf(10, .1, 1);
+ bf.insert("foo");
+ bf.insert("bar");
+
+ ASSERT_TRUE(bf.contains("foo"));
+ ASSERT_TRUE(bf.contains("bar"));
+}
+
+TEST(BloomFilter, Sweep) {
+ std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl;
+ for (int ex = 3; ex < 12; ex++) {
+ for (float fpp = .001; fpp < .5; fpp *= 2.0) {
+ int max = 2 << ex;
+ bloom_filter bf(max, fpp, 1);
+ bf.insert("foo");
+ bf.insert("bar");
+
+ ASSERT_TRUE(bf.contains("foo"));
+ ASSERT_TRUE(bf.contains("bar"));
+
+ for (int n = 0; n < max; n++)
+ bf.insert("ok" + stringify(n));
+
+ int test = max * 100;
+ int hit = 0;
+ for (int n = 0; n < test; n++)
+ if (bf.contains("asdf" + stringify(n)))
+ hit++;
+
+ ASSERT_TRUE(bf.contains("foo"));
+ ASSERT_TRUE(bf.contains("bar"));
+
+ double actual = (double)hit / (double)test;
+
+ bufferlist bl;
+ ::encode(bf, bl);
+
+ double byte_per_insert = (double)bl.length() / (double)max;
+
+ std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl;
+ ASSERT_TRUE(actual < fpp * 10);
+
+ }
+ }
+}