Added bloom filter support.v1.4

In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update.
author: Sanjay Ghemawat <sanjay@google.com> 2012-04-17 08:36:46 -0700
committer: Sanjay Ghemawat <sanjay@google.com> 2012-04-17 08:36:46 -0700
commit: 85584d497e7b354853b72f450683d59fcf6b9c5c (patch)
tree: b6aad4f973f87487c3caa5600e7596219c79f645 /table/filter_block.cc
parent: bc1ee4d25e09b04e074db330a41f54ef4af0e31b (diff)
download: leveldb-85584d497e7b354853b72f450683d59fcf6b9c5c.tar.gz
1 files changed, 111 insertions, 0 deletions
diff --git a/table/filter_block.cc b/table/filter_block.cc
new file mode 100644
index 0000000..203e15c
--- /dev/null
+++ b/table/filter_block.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/filter_block.h"
+
+#include "leveldb/filter_policy.h"
+#include "util/coding.h"
+
+namespace leveldb {
+
+// See doc/table_format.txt for an explanation of the filter block format.
+
+// Generate new filter every 2KB of data
+static const size_t kFilterBaseLg = 11;
+static const size_t kFilterBase = 1 << kFilterBaseLg;
+
+FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy)
+    : policy_(policy) {
+}
+
+void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
+  uint64_t filter_index = (block_offset / kFilterBase);
+  assert(filter_index >= filter_offsets_.size());
+  while (filter_index > filter_offsets_.size()) {
+    GenerateFilter();
+  }
+}
+
+void FilterBlockBuilder::AddKey(const Slice& key) {
+  Slice k = key;
+  start_.push_back(keys_.size());
+  keys_.append(k.data(), k.size());
+}
+
+Slice FilterBlockBuilder::Finish() {
+  if (!start_.empty()) {
+    GenerateFilter();
+  }
+
+  // Append array of per-filter offsets
+  const uint32_t array_offset = result_.size();
+  for (size_t i = 0; i < filter_offsets_.size(); i++) {
+    PutFixed32(&result_, filter_offsets_[i]);
+  }
+
+  PutFixed32(&result_, array_offset);
+  result_.push_back(kFilterBaseLg);  // Save encoding parameter in result
+  return Slice(result_);
+}
+
+void FilterBlockBuilder::GenerateFilter() {
+  const size_t num_keys = start_.size();
+  if (num_keys == 0) {
+    // Fast path if there are no keys for this filter
+    filter_offsets_.push_back(result_.size());
+    return;
+  }
+
+  // Make list of keys from flattened key structure
+  start_.push_back(keys_.size());  // Simplify length computation
+  tmp_keys_.resize(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    const char* base = keys_.data() + start_[i];
+    size_t length = start_[i+1] - start_[i];
+    tmp_keys_[i] = Slice(base, length);
+  }
+
+  // Generate filter for current set of keys and append to result_.
+  filter_offsets_.push_back(result_.size());
+  policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_);
+
+  tmp_keys_.clear();
+  keys_.clear();
+  start_.clear();
+}
+
+FilterBlockReader::FilterBlockReader(const FilterPolicy* policy,
+                                     const Slice& contents)
+    : policy_(policy),
+      data_(NULL),
+      offset_(NULL),
+      num_(0),
+      base_lg_(0) {
+  size_t n = contents.size();
+  if (n < 5) return;  // 1 byte for base_lg_ and 4 for start of offset array
+  base_lg_ = contents[n-1];
+  uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
+  if (last_word > n - 5) return;
+  data_ = contents.data();
+  offset_ = data_ + last_word;
+  num_ = (n - 5 - last_word) / 4;
+}
+
+bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
+  uint64_t index = block_offset >> base_lg_;
+  if (index < num_) {
+    uint32_t start = DecodeFixed32(offset_ + index*4);
+    uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
+    if (start <= limit && limit <= (offset_ - data_)) {
+      Slice filter = Slice(data_ + start, limit - start);
+      return policy_->KeyMayMatch(key, filter);
+    } else if (start == limit) {
+      // Empty filters do not match any keys
+      return false;
+    }
+  }
+  return true;  // Errors are treated as potential matches
+}
+
+}
author	Sanjay Ghemawat <sanjay@google.com>	2012-04-17 08:36:46 -0700
committer	Sanjay Ghemawat <sanjay@google.com>	2012-04-17 08:36:46 -0700
commit	85584d497e7b354853b72f450683d59fcf6b9c5c (patch)
tree	b6aad4f973f87487c3caa5600e7596219c79f645 /table/filter_block.cc
parent	bc1ee4d25e09b04e074db330a41f54ef4af0e31b (diff)
download	leveldb-85584d497e7b354853b72f450683d59fcf6b9c5c.tar.gz