Added bloom filter support.v1.4

In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update.
author: Sanjay Ghemawat <sanjay@google.com> 2012-04-17 08:36:46 -0700
committer: Sanjay Ghemawat <sanjay@google.com> 2012-04-17 08:36:46 -0700
commit: 85584d497e7b354853b72f450683d59fcf6b9c5c (patch)
tree: b6aad4f973f87487c3caa5600e7596219c79f645 /table
parent: bc1ee4d25e09b04e074db330a41f54ef4af0e31b (diff)
download: leveldb-85584d497e7b354853b72f450683d59fcf6b9c5c.tar.gz
10 files changed, 500 insertions, 63 deletions
diff --git a/table/block.cc b/table/block.cc
index 06eb6f8..199d453 100644
--- a/table/block.cc
+++ b/table/block.cc
@@ -9,6 +9,7 @@
 #include <vector>
 #include <algorithm>
 #include "leveldb/comparator.h"
+#include "table/format.h"
 #include "util/coding.h"
 #include "util/logging.h"
 
@@ -19,10 +20,10 @@ inline uint32_t Block::NumRestarts() const {
   return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
 }
 
-Block::Block(const char* data, size_t size, bool take_ownership)
-    : data_(data),
-      size_(size),
-      owned_(take_ownership) {
+Block::Block(const BlockContents& contents)
+    : data_(contents.data.data()),
+      size_(contents.data.size()),
+      owned_(contents.heap_allocated) {
   if (size_ < sizeof(uint32_t)) {
     size_ = 0;  // Error marker
   } else {
diff --git a/table/block.h b/table/block.h
index 76088a4..2493eb9 100644
--- a/table/block.h
+++ b/table/block.h
@@ -11,14 +11,13 @@
 
 namespace leveldb {
 
+struct BlockContents;
 class Comparator;
 
 class Block {
  public:
   // Initialize the block with the specified contents.
-  // Takes ownership of data[] and will delete[] it when done iff
-  // "take_ownership is true.
-  Block(const char* data, size_t size, bool take_ownership);
+  explicit Block(const BlockContents& contents);
 
   ~Block();
 
diff --git a/table/filter_block.cc b/table/filter_block.cc
new file mode 100644
index 0000000..203e15c
--- /dev/null
+++ b/table/filter_block.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/filter_block.h"
+
+#include "leveldb/filter_policy.h"
+#include "util/coding.h"
+
+namespace leveldb {
+
+// See doc/table_format.txt for an explanation of the filter block format.
+
+// Generate new filter every 2KB of data
+static const size_t kFilterBaseLg = 11;
+static const size_t kFilterBase = 1 << kFilterBaseLg;
+
+FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy)
+    : policy_(policy) {
+}
+
+void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
+  uint64_t filter_index = (block_offset / kFilterBase);
+  assert(filter_index >= filter_offsets_.size());
+  while (filter_index > filter_offsets_.size()) {
+    GenerateFilter();
+  }
+}
+
+void FilterBlockBuilder::AddKey(const Slice& key) {
+  Slice k = key;
+  start_.push_back(keys_.size());
+  keys_.append(k.data(), k.size());
+}
+
+Slice FilterBlockBuilder::Finish() {
+  if (!start_.empty()) {
+    GenerateFilter();
+  }
+
+  // Append array of per-filter offsets
+  const uint32_t array_offset = result_.size();
+  for (size_t i = 0; i < filter_offsets_.size(); i++) {
+    PutFixed32(&result_, filter_offsets_[i]);
+  }
+
+  PutFixed32(&result_, array_offset);
+  result_.push_back(kFilterBaseLg);  // Save encoding parameter in result
+  return Slice(result_);
+}
+
+void FilterBlockBuilder::GenerateFilter() {
+  const size_t num_keys = start_.size();
+  if (num_keys == 0) {
+    // Fast path if there are no keys for this filter
+    filter_offsets_.push_back(result_.size());
+    return;
+  }
+
+  // Make list of keys from flattened key structure
+  start_.push_back(keys_.size());  // Simplify length computation
+  tmp_keys_.resize(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    const char* base = keys_.data() + start_[i];
+    size_t length = start_[i+1] - start_[i];
+    tmp_keys_[i] = Slice(base, length);
+  }
+
+  // Generate filter for current set of keys and append to result_.
+  filter_offsets_.push_back(result_.size());
+  policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_);
+
+  tmp_keys_.clear();
+  keys_.clear();
+  start_.clear();
+}
+
+FilterBlockReader::FilterBlockReader(const FilterPolicy* policy,
+                                     const Slice& contents)
+    : policy_(policy),
+      data_(NULL),
+      offset_(NULL),
+      num_(0),
+      base_lg_(0) {
+  size_t n = contents.size();
+  if (n < 5) return;  // 1 byte for base_lg_ and 4 for start of offset array
+  base_lg_ = contents[n-1];
+  uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
+  if (last_word > n - 5) return;
+  data_ = contents.data();
+  offset_ = data_ + last_word;
+  num_ = (n - 5 - last_word) / 4;
+}
+
+bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
+  uint64_t index = block_offset >> base_lg_;
+  if (index < num_) {
+    uint32_t start = DecodeFixed32(offset_ + index*4);
+    uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
+    if (start <= limit && limit <= (offset_ - data_)) {
+      Slice filter = Slice(data_ + start, limit - start);
+      return policy_->KeyMayMatch(key, filter);
+    } else if (start == limit) {
+      // Empty filters do not match any keys
+      return false;
+    }
+  }
+  return true;  // Errors are treated as potential matches
+}
+
+}
diff --git a/table/filter_block.h b/table/filter_block.h
new file mode 100644
index 0000000..c67d010
--- /dev/null
+++ b/table/filter_block.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A filter block is stored near the end of a Table file.  It contains
+// filters (e.g., bloom filters) for all data blocks in the table combined
+// into a single filter block.
+
+#ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
+#define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <vector>
+#include "leveldb/slice.h"
+#include "util/hash.h"
+
+namespace leveldb {
+
+class FilterPolicy;
+
+// A FilterBlockBuilder is used to construct all of the filters for a
+// particular Table.  It generates a single string which is stored as
+// a special block in the Table.
+//
+// The sequence of calls to FilterBlockBuilder must match the regexp:
+//      (StartBlock AddKey*)* Finish
+class FilterBlockBuilder {
+ public:
+  explicit FilterBlockBuilder(const FilterPolicy*);
+
+  void StartBlock(uint64_t block_offset);
+  void AddKey(const Slice& key);
+  Slice Finish();
+
+ private:
+  void GenerateFilter();
+
+  const FilterPolicy* policy_;
+  std::string keys_;              // Flattened key contents
+  std::vector<size_t> start_;     // Starting index in keys_ of each key
+  std::string result_;            // Filter data computed so far
+  std::vector<Slice> tmp_keys_;   // policy_->CreateFilter() argument
+  std::vector<uint32_t> filter_offsets_;
+
+  // No copying allowed
+  FilterBlockBuilder(const FilterBlockBuilder&);
+  void operator=(const FilterBlockBuilder&);
+};
+
+class FilterBlockReader {
+ public:
+ // REQUIRES: "contents" and *policy must stay live while *this is live.
+  FilterBlockReader(const FilterPolicy* policy, const Slice& contents);
+  bool KeyMayMatch(uint64_t block_offset, const Slice& key);
+
+ private:
+  const FilterPolicy* policy_;
+  const char* data_;    // Pointer to filter data (at block-start)
+  const char* offset_;  // Pointer to beginning of offset array (at block-end)
+  size_t num_;          // Number of entries in offset array
+  size_t base_lg_;      // Encoding parameter (see kFilterBaseLg in .cc file)
+};
+
+}
+
+#endif  // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
diff --git a/table/filter_block_test.cc b/table/filter_block_test.cc
new file mode 100644
index 0000000..3a2a07c
--- /dev/null
+++ b/table/filter_block_test.cc
@@ -0,0 +1,128 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/filter_block.h"
+
+#include "leveldb/filter_policy.h"
+#include "util/coding.h"
+#include "util/hash.h"
+#include "util/logging.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+
+namespace leveldb {
+
+// For testing: emit an array with one hash value per key
+class TestHashFilter : public FilterPolicy {
+ public:
+  virtual const char* Name() const {
+    return "TestHashFilter";
+  }
+
+  virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
+    for (int i = 0; i < n; i++) {
+      uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
+      PutFixed32(dst, h);
+    }
+  }
+
+  virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
+    uint32_t h = Hash(key.data(), key.size(), 1);
+    for (int i = 0; i + 4 <= filter.size(); i += 4) {
+      if (h == DecodeFixed32(filter.data() + i)) {
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+class FilterBlockTest {
+ public:
+  TestHashFilter policy_;
+};
+
+TEST(FilterBlockTest, EmptyBuilder) {
+  FilterBlockBuilder builder(&policy_);
+  Slice block = builder.Finish();
+  ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
+  FilterBlockReader reader(&policy_, block);
+  ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
+  ASSERT_TRUE(reader.KeyMayMatch(100000, "foo"));
+}
+
+TEST(FilterBlockTest, SingleChunk) {
+  FilterBlockBuilder builder(&policy_);
+  builder.StartBlock(100);
+  builder.AddKey("foo");
+  builder.AddKey("bar");
+  builder.AddKey("box");
+  builder.StartBlock(200);
+  builder.AddKey("box");
+  builder.StartBlock(300);
+  builder.AddKey("hello");
+  Slice block = builder.Finish();
+  FilterBlockReader reader(&policy_, block);
+  ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
+  ASSERT_TRUE(reader.KeyMayMatch(100, "bar"));
+  ASSERT_TRUE(reader.KeyMayMatch(100, "box"));
+  ASSERT_TRUE(reader.KeyMayMatch(100, "hello"));
+  ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
+  ASSERT_TRUE(! reader.KeyMayMatch(100, "missing"));
+  ASSERT_TRUE(! reader.KeyMayMatch(100, "other"));
+}
+
+TEST(FilterBlockTest, MultiChunk) {
+  FilterBlockBuilder builder(&policy_);
+
+  // First filter
+  builder.StartBlock(0);
+  builder.AddKey("foo");
+  builder.StartBlock(2000);
+  builder.AddKey("bar");
+
+  // Second filter
+  builder.StartBlock(3100);
+  builder.AddKey("box");
+
+  // Third filter is empty
+
+  // Last filter
+  builder.StartBlock(9000);
+  builder.AddKey("box");
+  builder.AddKey("hello");
+
+  Slice block = builder.Finish();
+  FilterBlockReader reader(&policy_, block);
+
+  // Check first filter
+  ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
+  ASSERT_TRUE(reader.KeyMayMatch(2000, "bar"));
+  ASSERT_TRUE(! reader.KeyMayMatch(0, "box"));
+  ASSERT_TRUE(! reader.KeyMayMatch(0, "hello"));
+
+  // Check second filter
+  ASSERT_TRUE(reader.KeyMayMatch(3100, "box"));
+  ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo"));
+  ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar"));
+  ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello"));
+
+  // Check third filter (empty)
+  ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo"));
+  ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar"));
+  ASSERT_TRUE(! reader.KeyMayMatch(4100, "box"));
+  ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello"));
+
+  // Check last filter
+  ASSERT_TRUE(reader.KeyMayMatch(9000, "box"));
+  ASSERT_TRUE(reader.KeyMayMatch(9000, "hello"));
+  ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo"));
+  ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar"));
+}
+
+}  // namespace leveldb
+
+int main(int argc, char** argv) {
+  return leveldb::test::RunAllTests();
+}
diff --git a/table/format.cc b/table/format.cc
index 25b85a2..cda1dec 100644
--- a/table/format.cc
+++ b/table/format.cc
@@ -66,10 +66,10 @@ Status Footer::DecodeFrom(Slice* input) {
 Status ReadBlock(RandomAccessFile* file,
                  const ReadOptions& options,
                  const BlockHandle& handle,
-                 Block** block,
-                 bool* may_cache) {
-  *block = NULL;
-  *may_cache = false;
+                 BlockContents* result) {
+  result->data = Slice();
+  result->cachable = false;
+  result->heap_allocated = false;
 
   // Read the block contents as well as the type/crc footer.
   // See table_builder.cc for the code that built this structure.
@@ -105,11 +105,13 @@ Status ReadBlock(RandomAccessFile* file,
         // Use it directly under the assumption that it will be live
         // while the file is open.
         delete[] buf;
-        *block = new Block(data, n, false /* do not take ownership */);
-        *may_cache = false;  // Do not double-cache
+        result->data = Slice(data, n);
+        result->heap_allocated = false;
+        result->cachable = false;  // Do not double-cache
       } else {
-        *block = new Block(buf, n, true /* take ownership */);
-        *may_cache = true;
+        result->data = Slice(buf, n);
+        result->heap_allocated = true;
+        result->cachable = true;
       }
 
       // Ok
@@ -127,8 +129,9 @@ Status ReadBlock(RandomAccessFile* file,
         return Status::Corruption("corrupted compressed block contents");
       }
       delete[] buf;
-      *block = new Block(ubuf, ulength, true /* take ownership */);
-      *may_cache = true;
+      result->data = Slice(ubuf, ulength);
+      result->heap_allocated = true;
+      result->cachable = true;
       break;
     }
     default:
diff --git a/table/format.h b/table/format.h
index 66a15da..6c0b80c 100644
--- a/table/format.h
+++ b/table/format.h
@@ -83,16 +83,18 @@ static const uint64_t kTableMagicNumber = 0xdb4775248b80fb57ull;
 // 1-byte type + 32-bit crc
 static const size_t kBlockTrailerSize = 5;
 
-// Read the block identified by "handle" from "file".  On success,
-// store a pointer to the heap-allocated result in *block and return
-// OK.  On failure store NULL in *block and return non-OK.
-// On success, stores true in *may_cache if the result may be
-// cached, false if it must not be cached.
+struct BlockContents {
+  Slice data;           // Actual contents of data
+  bool cachable;        // True iff data can be cached
+  bool heap_allocated;  // True iff caller should delete[] data.data()
+};
+
+// Read the block identified by "handle" from "file".  On failure
+// return non-OK.  On success fill *result and return OK.
 extern Status ReadBlock(RandomAccessFile* file,
                         const ReadOptions& options,
                         const BlockHandle& handle,
-                        Block** block,
-                        bool* may_cache);
+                        BlockContents* result);
 
 // Implementation details follow.  Clients should ignore,
 
diff --git a/table/table.cc b/table/table.cc
index 07dcffd..dbd6d3a 100644
--- a/table/table.cc
+++ b/table/table.cc
@@ -5,8 +5,12 @@
 #include "leveldb/table.h"
 
 #include "leveldb/cache.h"
+#include "leveldb/comparator.h"
 #include "leveldb/env.h"
+#include "leveldb/filter_policy.h"
+#include "leveldb/options.h"
 #include "table/block.h"
+#include "table/filter_block.h"
 #include "table/format.h"
 #include "table/two_level_iterator.h"
 #include "util/coding.h"
@@ -15,6 +19,8 @@ namespace leveldb {
 
 struct Table::Rep {
   ~Rep() {
+    delete filter;
+    delete [] filter_data;
     delete index_block;
   }
 
@@ -22,6 +28,8 @@ struct Table::Rep {
   Status status;
   RandomAccessFile* file;
   uint64_t cache_id;
+  FilterBlockReader* filter;
+  const char* filter_data;
 
   BlockHandle metaindex_handle;  // Handle to metaindex_block: saved from footer
   Block* index_block;
@@ -47,11 +55,13 @@ Status Table::Open(const Options& options,
   if (!s.ok()) return s;
 
   // Read the index block
+  BlockContents contents;
   Block* index_block = NULL;
   if (s.ok()) {
-    bool may_cache;  // Ignored result
-    s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block,
-                  &may_cache);
+    s = ReadBlock(file, ReadOptions(), footer.index_handle(), &contents);
+    if (s.ok()) {
+      index_block = new Block(contents);
+    }
   }
 
   if (s.ok()) {
@@ -63,7 +73,10 @@ Status Table::Open(const Options& options,
     rep->metaindex_handle = footer.metaindex_handle();
     rep->index_block = index_block;
     rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
+    rep->filter_data = NULL;
+    rep->filter = NULL;
     *table = new Table(rep);
+    (*table)->ReadMeta(footer);
   } else {
     if (index_block) delete index_block;
   }
@@ -71,6 +84,52 @@ Status Table::Open(const Options& options,
   return s;
 }
 
+void Table::ReadMeta(const Footer& footer) {
+  if (rep_->options.filter_policy == NULL) {
+    return;  // Do not need any metadata
+  }
+
+  // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
+  // it is an empty block.
+  ReadOptions opt;
+  BlockContents contents;
+  if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
+    // Do not propagate errors since meta info is not needed for operation
+    return;
+  }
+  Block* meta = new Block(contents);
+
+  Iterator* iter = meta->NewIterator(BytewiseComparator());
+  std::string key = "filter.";
+  key.append(rep_->options.filter_policy->Name());
+  iter->Seek(key);
+  if (iter->Valid() && iter->key() == Slice(key)) {
+    ReadFilter(iter->value());
+  }
+  delete iter;
+  delete meta;
+}
+
+void Table::ReadFilter(const Slice& filter_handle_value) {
+  Slice v = filter_handle_value;
+  BlockHandle filter_handle;
+  if (!filter_handle.DecodeFrom(&v).ok()) {
+    return;
+  }
+
+  // We might want to unify with ReadBlock() if we start
+  // requiring checksum verification in Table::Open.
+  ReadOptions opt;
+  BlockContents block;
+  if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
+    return;
+  }
+  if (block.heap_allocated) {
+    rep_->filter_data = block.data.data();     // Will need to delete later
+  }
+  rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
+}
+
 Table::~Table() {
   delete rep_;
 }
@@ -107,7 +166,7 @@ Iterator* Table::BlockReader(void* arg,
   // can add more features in the future.
 
   if (s.ok()) {
-    bool may_cache;
+    BlockContents contents;
     if (block_cache != NULL) {
       char cache_key_buffer[16];
       EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
@@ -117,14 +176,20 @@ Iterator* Table::BlockReader(void* arg,
       if (cache_handle != NULL) {
         block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
       } else {
-        s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
-        if (s.ok() && may_cache && options.fill_cache) {
-          cache_handle = block_cache->Insert(
-              key, block, block->size(), &DeleteCachedBlock);
+        s = ReadBlock(table->rep_->file, options, handle, &contents);
+        if (s.ok()) {
+          block = new Block(contents);
+          if (contents.cachable && options.fill_cache) {
+            cache_handle = block_cache->Insert(
+                key, block, block->size(), &DeleteCachedBlock);
+          }
         }
       }
     } else {
-      s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
+      s = ReadBlock(table->rep_->file, options, handle, &contents);
+      if (s.ok()) {
+        block = new Block(contents);
+      }
     }
   }
 
@@ -148,6 +213,39 @@ Iterator* Table::NewIterator(const ReadOptions& options) const {
       &Table::BlockReader, const_cast<Table*>(this), options);
 }
 
+Status Table::InternalGet(const ReadOptions& options, const Slice& k,
+                          void* arg,
+                          void (*saver)(void*, const Slice&, const Slice&)) {
+  Status s;
+  Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
+  iiter->Seek(k);
+  if (iiter->Valid()) {
+    Slice handle_value = iiter->value();
+    FilterBlockReader* filter = rep_->filter;
+    BlockHandle handle;
+    if (filter != NULL &&
+        handle.DecodeFrom(&handle_value).ok() &&
+        !filter->KeyMayMatch(handle.offset(), k)) {
+      // Not found
+    } else {
+      Slice handle = iiter->value();
+      Iterator* block_iter = BlockReader(this, options, iiter->value());
+      block_iter->Seek(k);
+      if (block_iter->Valid()) {
+        (*saver)(arg, block_iter->key(), block_iter->value());
+      }
+      s = block_iter->status();
+      delete block_iter;
+    }
+  }
+  if (s.ok()) {
+    s = iiter->status();
+  }
+  delete iiter;
+  return s;
+}
+
+
 uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
   Iterator* index_iter =
       rep_->index_block->NewIterator(rep_->options.comparator);
diff --git a/table/table_builder.cc b/table/table_builder.cc
index 682ce5b..62002c8 100644
--- a/table/table_builder.cc
+++ b/table/table_builder.cc
@@ -5,14 +5,15 @@
 #include "leveldb/table_builder.h"
 
 #include <assert.h>
-#include <stdio.h>
 #include "leveldb/comparator.h"
 #include "leveldb/env.h"
+#include "leveldb/filter_policy.h"
+#include "leveldb/options.h"
 #include "table/block_builder.h"
+#include "table/filter_block.h"
 #include "table/format.h"
 #include "util/coding.h"
 #include "util/crc32c.h"
-#include "util/logging.h"
 
 namespace leveldb {
 
@@ -27,6 +28,7 @@ struct TableBuilder::Rep {
   std::string last_key;
   int64_t num_entries;
   bool closed;          // Either Finish() or Abandon() has been called.
+  FilterBlockBuilder* filter_block;
 
   // We do not emit the index entry for a block until we have seen the
   // first key for the next data block.  This allows us to use shorter
@@ -51,6 +53,8 @@ struct TableBuilder::Rep {
         index_block(&index_block_options),
         num_entries(0),
         closed(false),
+        filter_block(opt.filter_policy == NULL ? NULL
+                     : new FilterBlockBuilder(opt.filter_policy)),
         pending_index_entry(false) {
     index_block_options.block_restart_interval = 1;
   }
@@ -58,10 +62,14 @@ struct TableBuilder::Rep {
 
 TableBuilder::TableBuilder(const Options& options, WritableFile* file)
     : rep_(new Rep(options, file)) {
+  if (rep_->filter_block != NULL) {
+    rep_->filter_block->StartBlock(0);
+  }
 }
 
 TableBuilder::~TableBuilder() {
   assert(rep_->closed);  // Catch errors where caller forgot to call Finish()
+  delete rep_->filter_block;
   delete rep_;
 }
 
@@ -98,6 +106,10 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
     r->pending_index_entry = false;
   }
 
+  if (r->filter_block != NULL) {
+    r->filter_block->AddKey(key);
+  }
+
   r->last_key.assign(key.data(), key.size());
   r->num_entries++;
   r->data_block.Add(key, value);
@@ -119,6 +131,9 @@ void TableBuilder::Flush() {
     r->pending_index_entry = true;
     r->status = r->file->Flush();
   }
+  if (r->filter_block != NULL) {
+    r->filter_block->StartBlock(r->offset);
+  }
 }
 
 void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
@@ -152,6 +167,15 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
       break;
     }
   }
+  WriteRawBlock(block_contents, type, handle);
+  r->compressed_output.clear();
+  block->Reset();
+}
+
+void TableBuilder::WriteRawBlock(const Slice& block_contents,
+                                 CompressionType type,
+                                 BlockHandle* handle) {
+  Rep* r = rep_;
   handle->set_offset(r->offset);
   handle->set_size(block_contents.size());
   r->status = r->file->Append(block_contents);
@@ -166,8 +190,6 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
       r->offset += block_contents.size() + kBlockTrailerSize;
     }
   }
-  r->compressed_output.clear();
-  block->Reset();
 }
 
 Status TableBuilder::status() const {
@@ -179,13 +201,32 @@ Status TableBuilder::Finish() {
   Flush();
   assert(!r->closed);
   r->closed = true;
-  BlockHandle metaindex_block_handle;
-  BlockHandle index_block_handle;
+
+  BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle;
+
+  // Write filter block
+  if (ok() && r->filter_block != NULL) {
+    WriteRawBlock(r->filter_block->Finish(), kNoCompression,
+                  &filter_block_handle);
+  }
+
+  // Write metaindex block
   if (ok()) {
     BlockBuilder meta_index_block(&r->options);
+    if (r->filter_block != NULL) {
+      // Add mapping from "filter.Name" to location of filter data
+      std::string key = "filter.";
+      key.append(r->options.filter_policy->Name());
+      std::string handle_encoding;
+      filter_block_handle.EncodeTo(&handle_encoding);
+      meta_index_block.Add(key, handle_encoding);
+    }
+
     // TODO(postrelease): Add stats and other meta blocks
     WriteBlock(&meta_index_block, &metaindex_block_handle);
   }
+
+  // Write index block
   if (ok()) {
     if (r->pending_index_entry) {
       r->options.comparator->FindShortSuccessor(&r->last_key);
@@ -196,6 +237,8 @@ Status TableBuilder::Finish() {
     }
     WriteBlock(&r->index_block, &index_block_handle);
   }
+
+  // Write footer
   if (ok()) {
     Footer footer;
     footer.set_metaindex_handle(metaindex_block_handle);
diff --git a/table/table_test.cc b/table/table_test.cc
index 0c8e676..57cea25 100644
--- a/table/table_test.cc
+++ b/table/table_test.cc
@@ -168,8 +168,6 @@ class Constructor {
   // Construct the data structure from the data in "data"
   virtual Status FinishImpl(const Options& options, const KVMap& data) = 0;
 
-  virtual size_t NumBytes() const = 0;
-
   virtual Iterator* NewIterator() const = 0;
 
   virtual const KVMap& data() { return data_; }
@@ -185,7 +183,6 @@ class BlockConstructor: public Constructor {
   explicit BlockConstructor(const Comparator* cmp)
       : Constructor(cmp),
         comparator_(cmp),
-        block_size_(-1),
         block_(NULL) { }
   ~BlockConstructor() {
     delete block_;
@@ -201,22 +198,21 @@ class BlockConstructor: public Constructor {
       builder.Add(it->first, it->second);
     }
     // Open the block
-    Slice block_data = builder.Finish();
-    block_size_ = block_data.size();
-    char* block_data_copy = new char[block_size_];
-    memcpy(block_data_copy, block_data.data(), block_size_);
-    block_ = new Block(block_data_copy, block_size_, true /* take ownership */);
+    data_ = builder.Finish().ToString();
+    BlockContents contents;
+    contents.data = data_;
+    contents.cachable = false;
+    contents.heap_allocated = false;
+    block_ = new Block(contents);
     return Status::OK();
   }
-  virtual size_t NumBytes() const { return block_size_; }
-
   virtual Iterator* NewIterator() const {
     return block_->NewIterator(comparator_);
   }
 
  private:
   const Comparator* comparator_;
-  int block_size_;
+  std::string data_;
   Block* block_;
 
   BlockConstructor();
@@ -253,7 +249,6 @@ class TableConstructor: public Constructor {
     table_options.comparator = options.comparator;
     return Table::Open(table_options, source_, sink.contents().size(), &table_);
   }
-  virtual size_t NumBytes() const { return source_->Size(); }
 
   virtual Iterator* NewIterator() const {
     return table_->NewIterator(ReadOptions());
@@ -342,10 +337,6 @@ class MemTableConstructor: public Constructor {
     }
     return Status::OK();
   }
-  virtual size_t NumBytes() const {
-    return memtable_->ApproximateMemoryUsage();
-  }
-
   virtual Iterator* NewIterator() const {
     return new KeyConvertingIterator(memtable_->NewIterator());
   }
@@ -379,13 +370,6 @@ class DBConstructor: public Constructor {
     }
     return Status::OK();
   }
-  virtual size_t NumBytes() const {
-    Range r("", "\xff\xff");
-    uint64_t size;
-    db_->GetApproximateSizes(&r, 1, &size);
-    return size;
-  }
-
   virtual Iterator* NewIterator() const {
     return db_->NewIterator(ReadOptions());
   }
@@ -809,7 +793,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"),  210000, 211000));
   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"),  510000, 511000));
   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"),  510000, 511000));
-  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),  610000, 611000));
+  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),  610000, 612000));
 
 }
author	Sanjay Ghemawat <sanjay@google.com>	2012-04-17 08:36:46 -0700
committer	Sanjay Ghemawat <sanjay@google.com>	2012-04-17 08:36:46 -0700
commit	85584d497e7b354853b72f450683d59fcf6b9c5c (patch)
tree	b6aad4f973f87487c3caa5600e7596219c79f645 /table
parent	bc1ee4d25e09b04e074db330a41f54ef4af0e31b (diff)
download	leveldb-85584d497e7b354853b72f450683d59fcf6b9c5c.tar.gz