summaryrefslogtreecommitdiff
path: root/table
diff options
context:
space:
mode:
authorSanjay Ghemawat <sanjay@google.com>2012-04-17 08:36:46 -0700
committerSanjay Ghemawat <sanjay@google.com>2012-04-17 08:36:46 -0700
commit85584d497e7b354853b72f450683d59fcf6b9c5c (patch)
treeb6aad4f973f87487c3caa5600e7596219c79f645 /table
parentbc1ee4d25e09b04e074db330a41f54ef4af0e31b (diff)
downloadleveldb-85584d497e7b354853b72f450683d59fcf6b9c5c.tar.gz
Added bloom filter support.v1.4
In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update.
Diffstat (limited to 'table')
-rw-r--r--table/block.cc9
-rw-r--r--table/block.h5
-rw-r--r--table/filter_block.cc111
-rw-r--r--table/filter_block.h68
-rw-r--r--table/filter_block_test.cc128
-rw-r--r--table/format.cc23
-rw-r--r--table/format.h16
-rw-r--r--table/table.cc116
-rw-r--r--table/table_builder.cc55
-rw-r--r--table/table_test.cc32
10 files changed, 500 insertions, 63 deletions
diff --git a/table/block.cc b/table/block.cc
index 06eb6f8..199d453 100644
--- a/table/block.cc
+++ b/table/block.cc
@@ -9,6 +9,7 @@
#include <vector>
#include <algorithm>
#include "leveldb/comparator.h"
+#include "table/format.h"
#include "util/coding.h"
#include "util/logging.h"
@@ -19,10 +20,10 @@ inline uint32_t Block::NumRestarts() const {
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
}
-Block::Block(const char* data, size_t size, bool take_ownership)
- : data_(data),
- size_(size),
- owned_(take_ownership) {
+Block::Block(const BlockContents& contents)
+ : data_(contents.data.data()),
+ size_(contents.data.size()),
+ owned_(contents.heap_allocated) {
if (size_ < sizeof(uint32_t)) {
size_ = 0; // Error marker
} else {
diff --git a/table/block.h b/table/block.h
index 76088a4..2493eb9 100644
--- a/table/block.h
+++ b/table/block.h
@@ -11,14 +11,13 @@
namespace leveldb {
+struct BlockContents;
class Comparator;
class Block {
public:
// Initialize the block with the specified contents.
- // Takes ownership of data[] and will delete[] it when done iff
- // "take_ownership is true.
- Block(const char* data, size_t size, bool take_ownership);
+ explicit Block(const BlockContents& contents);
~Block();
diff --git a/table/filter_block.cc b/table/filter_block.cc
new file mode 100644
index 0000000..203e15c
--- /dev/null
+++ b/table/filter_block.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/filter_block.h"
+
+#include "leveldb/filter_policy.h"
+#include "util/coding.h"
+
+namespace leveldb {
+
+// See doc/table_format.txt for an explanation of the filter block format.
+
+// Generate new filter every 2KB of data
+static const size_t kFilterBaseLg = 11;
+static const size_t kFilterBase = 1 << kFilterBaseLg;
+
+FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy)
+ : policy_(policy) {
+}
+
+void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
+ uint64_t filter_index = (block_offset / kFilterBase);
+ assert(filter_index >= filter_offsets_.size());
+ while (filter_index > filter_offsets_.size()) {
+ GenerateFilter();
+ }
+}
+
+void FilterBlockBuilder::AddKey(const Slice& key) {
+ Slice k = key;
+ start_.push_back(keys_.size());
+ keys_.append(k.data(), k.size());
+}
+
+Slice FilterBlockBuilder::Finish() {
+ if (!start_.empty()) {
+ GenerateFilter();
+ }
+
+ // Append array of per-filter offsets
+ const uint32_t array_offset = result_.size();
+ for (size_t i = 0; i < filter_offsets_.size(); i++) {
+ PutFixed32(&result_, filter_offsets_[i]);
+ }
+
+ PutFixed32(&result_, array_offset);
+ result_.push_back(kFilterBaseLg); // Save encoding parameter in result
+ return Slice(result_);
+}
+
+void FilterBlockBuilder::GenerateFilter() {
+ const size_t num_keys = start_.size();
+ if (num_keys == 0) {
+ // Fast path if there are no keys for this filter
+ filter_offsets_.push_back(result_.size());
+ return;
+ }
+
+ // Make list of keys from flattened key structure
+ start_.push_back(keys_.size()); // Simplify length computation
+ tmp_keys_.resize(num_keys);
+ for (size_t i = 0; i < num_keys; i++) {
+ const char* base = keys_.data() + start_[i];
+ size_t length = start_[i+1] - start_[i];
+ tmp_keys_[i] = Slice(base, length);
+ }
+
+ // Generate filter for current set of keys and append to result_.
+ filter_offsets_.push_back(result_.size());
+ policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_);
+
+ tmp_keys_.clear();
+ keys_.clear();
+ start_.clear();
+}
+
+FilterBlockReader::FilterBlockReader(const FilterPolicy* policy,
+ const Slice& contents)
+ : policy_(policy),
+ data_(NULL),
+ offset_(NULL),
+ num_(0),
+ base_lg_(0) {
+ size_t n = contents.size();
+ if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
+ base_lg_ = contents[n-1];
+ uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
+ if (last_word > n - 5) return;
+ data_ = contents.data();
+ offset_ = data_ + last_word;
+ num_ = (n - 5 - last_word) / 4;
+}
+
+bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
+ uint64_t index = block_offset >> base_lg_;
+ if (index < num_) {
+ uint32_t start = DecodeFixed32(offset_ + index*4);
+ uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
+ if (start <= limit && limit <= (offset_ - data_)) {
+ Slice filter = Slice(data_ + start, limit - start);
+ return policy_->KeyMayMatch(key, filter);
+ } else if (start == limit) {
+ // Empty filters do not match any keys
+ return false;
+ }
+ }
+ return true; // Errors are treated as potential matches
+}
+
+}
diff --git a/table/filter_block.h b/table/filter_block.h
new file mode 100644
index 0000000..c67d010
--- /dev/null
+++ b/table/filter_block.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A filter block is stored near the end of a Table file. It contains
+// filters (e.g., bloom filters) for all data blocks in the table combined
+// into a single filter block.
+
+#ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
+#define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <vector>
+#include "leveldb/slice.h"
+#include "util/hash.h"
+
+namespace leveldb {
+
+class FilterPolicy;
+
+// A FilterBlockBuilder is used to construct all of the filters for a
+// particular Table. It generates a single string which is stored as
+// a special block in the Table.
+//
+// The sequence of calls to FilterBlockBuilder must match the regexp:
+// (StartBlock AddKey*)* Finish
+class FilterBlockBuilder {
+ public:
+ explicit FilterBlockBuilder(const FilterPolicy*);
+
+ void StartBlock(uint64_t block_offset);
+ void AddKey(const Slice& key);
+ Slice Finish();
+
+ private:
+ void GenerateFilter();
+
+ const FilterPolicy* policy_;
+ std::string keys_; // Flattened key contents
+ std::vector<size_t> start_; // Starting index in keys_ of each key
+ std::string result_; // Filter data computed so far
+ std::vector<Slice> tmp_keys_; // policy_->CreateFilter() argument
+ std::vector<uint32_t> filter_offsets_;
+
+ // No copying allowed
+ FilterBlockBuilder(const FilterBlockBuilder&);
+ void operator=(const FilterBlockBuilder&);
+};
+
+class FilterBlockReader {
+ public:
+ // REQUIRES: "contents" and *policy must stay live while *this is live.
+ FilterBlockReader(const FilterPolicy* policy, const Slice& contents);
+ bool KeyMayMatch(uint64_t block_offset, const Slice& key);
+
+ private:
+ const FilterPolicy* policy_;
+ const char* data_; // Pointer to filter data (at block-start)
+ const char* offset_; // Pointer to beginning of offset array (at block-end)
+ size_t num_; // Number of entries in offset array
+ size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
+};
+
+}
+
+#endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
diff --git a/table/filter_block_test.cc b/table/filter_block_test.cc
new file mode 100644
index 0000000..3a2a07c
--- /dev/null
+++ b/table/filter_block_test.cc
@@ -0,0 +1,128 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/filter_block.h"
+
+#include "leveldb/filter_policy.h"
+#include "util/coding.h"
+#include "util/hash.h"
+#include "util/logging.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+
+namespace leveldb {
+
+// For testing: emit an array with one hash value per key
+class TestHashFilter : public FilterPolicy {
+ public:
+ virtual const char* Name() const {
+ return "TestHashFilter";
+ }
+
+ virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
+ for (int i = 0; i < n; i++) {
+ uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
+ PutFixed32(dst, h);
+ }
+ }
+
+ virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
+ uint32_t h = Hash(key.data(), key.size(), 1);
+ for (int i = 0; i + 4 <= filter.size(); i += 4) {
+ if (h == DecodeFixed32(filter.data() + i)) {
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+class FilterBlockTest {
+ public:
+ TestHashFilter policy_;
+};
+
+TEST(FilterBlockTest, EmptyBuilder) {
+ FilterBlockBuilder builder(&policy_);
+ Slice block = builder.Finish();
+ ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
+ FilterBlockReader reader(&policy_, block);
+ ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
+ ASSERT_TRUE(reader.KeyMayMatch(100000, "foo"));
+}
+
+TEST(FilterBlockTest, SingleChunk) {
+ FilterBlockBuilder builder(&policy_);
+ builder.StartBlock(100);
+ builder.AddKey("foo");
+ builder.AddKey("bar");
+ builder.AddKey("box");
+ builder.StartBlock(200);
+ builder.AddKey("box");
+ builder.StartBlock(300);
+ builder.AddKey("hello");
+ Slice block = builder.Finish();
+ FilterBlockReader reader(&policy_, block);
+ ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
+ ASSERT_TRUE(reader.KeyMayMatch(100, "bar"));
+ ASSERT_TRUE(reader.KeyMayMatch(100, "box"));
+ ASSERT_TRUE(reader.KeyMayMatch(100, "hello"));
+ ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
+ ASSERT_TRUE(! reader.KeyMayMatch(100, "missing"));
+ ASSERT_TRUE(! reader.KeyMayMatch(100, "other"));
+}
+
+TEST(FilterBlockTest, MultiChunk) {
+ FilterBlockBuilder builder(&policy_);
+
+ // First filter
+ builder.StartBlock(0);
+ builder.AddKey("foo");
+ builder.StartBlock(2000);
+ builder.AddKey("bar");
+
+ // Second filter
+ builder.StartBlock(3100);
+ builder.AddKey("box");
+
+ // Third filter is empty
+
+ // Last filter
+ builder.StartBlock(9000);
+ builder.AddKey("box");
+ builder.AddKey("hello");
+
+ Slice block = builder.Finish();
+ FilterBlockReader reader(&policy_, block);
+
+ // Check first filter
+ ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
+ ASSERT_TRUE(reader.KeyMayMatch(2000, "bar"));
+ ASSERT_TRUE(! reader.KeyMayMatch(0, "box"));
+ ASSERT_TRUE(! reader.KeyMayMatch(0, "hello"));
+
+ // Check second filter
+ ASSERT_TRUE(reader.KeyMayMatch(3100, "box"));
+ ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo"));
+ ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar"));
+ ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello"));
+
+ // Check third filter (empty)
+ ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo"));
+ ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar"));
+ ASSERT_TRUE(! reader.KeyMayMatch(4100, "box"));
+ ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello"));
+
+ // Check last filter
+ ASSERT_TRUE(reader.KeyMayMatch(9000, "box"));
+ ASSERT_TRUE(reader.KeyMayMatch(9000, "hello"));
+ ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo"));
+ ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar"));
+}
+
+} // namespace leveldb
+
+int main(int argc, char** argv) {
+ return leveldb::test::RunAllTests();
+}
diff --git a/table/format.cc b/table/format.cc
index 25b85a2..cda1dec 100644
--- a/table/format.cc
+++ b/table/format.cc
@@ -66,10 +66,10 @@ Status Footer::DecodeFrom(Slice* input) {
Status ReadBlock(RandomAccessFile* file,
const ReadOptions& options,
const BlockHandle& handle,
- Block** block,
- bool* may_cache) {
- *block = NULL;
- *may_cache = false;
+ BlockContents* result) {
+ result->data = Slice();
+ result->cachable = false;
+ result->heap_allocated = false;
// Read the block contents as well as the type/crc footer.
// See table_builder.cc for the code that built this structure.
@@ -105,11 +105,13 @@ Status ReadBlock(RandomAccessFile* file,
// Use it directly under the assumption that it will be live
// while the file is open.
delete[] buf;
- *block = new Block(data, n, false /* do not take ownership */);
- *may_cache = false; // Do not double-cache
+ result->data = Slice(data, n);
+ result->heap_allocated = false;
+ result->cachable = false; // Do not double-cache
} else {
- *block = new Block(buf, n, true /* take ownership */);
- *may_cache = true;
+ result->data = Slice(buf, n);
+ result->heap_allocated = true;
+ result->cachable = true;
}
// Ok
@@ -127,8 +129,9 @@ Status ReadBlock(RandomAccessFile* file,
return Status::Corruption("corrupted compressed block contents");
}
delete[] buf;
- *block = new Block(ubuf, ulength, true /* take ownership */);
- *may_cache = true;
+ result->data = Slice(ubuf, ulength);
+ result->heap_allocated = true;
+ result->cachable = true;
break;
}
default:
diff --git a/table/format.h b/table/format.h
index 66a15da..6c0b80c 100644
--- a/table/format.h
+++ b/table/format.h
@@ -83,16 +83,18 @@ static const uint64_t kTableMagicNumber = 0xdb4775248b80fb57ull;
// 1-byte type + 32-bit crc
static const size_t kBlockTrailerSize = 5;
-// Read the block identified by "handle" from "file". On success,
-// store a pointer to the heap-allocated result in *block and return
-// OK. On failure store NULL in *block and return non-OK.
-// On success, stores true in *may_cache if the result may be
-// cached, false if it must not be cached.
+struct BlockContents {
+ Slice data; // Actual contents of data
+ bool cachable; // True iff data can be cached
+ bool heap_allocated; // True iff caller should delete[] data.data()
+};
+
+// Read the block identified by "handle" from "file". On failure
+// return non-OK. On success fill *result and return OK.
extern Status ReadBlock(RandomAccessFile* file,
const ReadOptions& options,
const BlockHandle& handle,
- Block** block,
- bool* may_cache);
+ BlockContents* result);
// Implementation details follow. Clients should ignore,
diff --git a/table/table.cc b/table/table.cc
index 07dcffd..dbd6d3a 100644
--- a/table/table.cc
+++ b/table/table.cc
@@ -5,8 +5,12 @@
#include "leveldb/table.h"
#include "leveldb/cache.h"
+#include "leveldb/comparator.h"
#include "leveldb/env.h"
+#include "leveldb/filter_policy.h"
+#include "leveldb/options.h"
#include "table/block.h"
+#include "table/filter_block.h"
#include "table/format.h"
#include "table/two_level_iterator.h"
#include "util/coding.h"
@@ -15,6 +19,8 @@ namespace leveldb {
struct Table::Rep {
~Rep() {
+ delete filter;
+ delete [] filter_data;
delete index_block;
}
@@ -22,6 +28,8 @@ struct Table::Rep {
Status status;
RandomAccessFile* file;
uint64_t cache_id;
+ FilterBlockReader* filter;
+ const char* filter_data;
BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
Block* index_block;
@@ -47,11 +55,13 @@ Status Table::Open(const Options& options,
if (!s.ok()) return s;
// Read the index block
+ BlockContents contents;
Block* index_block = NULL;
if (s.ok()) {
- bool may_cache; // Ignored result
- s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block,
- &may_cache);
+ s = ReadBlock(file, ReadOptions(), footer.index_handle(), &contents);
+ if (s.ok()) {
+ index_block = new Block(contents);
+ }
}
if (s.ok()) {
@@ -63,7 +73,10 @@ Status Table::Open(const Options& options,
rep->metaindex_handle = footer.metaindex_handle();
rep->index_block = index_block;
rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
+ rep->filter_data = NULL;
+ rep->filter = NULL;
*table = new Table(rep);
+ (*table)->ReadMeta(footer);
} else {
if (index_block) delete index_block;
}
@@ -71,6 +84,52 @@ Status Table::Open(const Options& options,
return s;
}
+void Table::ReadMeta(const Footer& footer) {
+ if (rep_->options.filter_policy == NULL) {
+ return; // Do not need any metadata
+ }
+
+ // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
+ // it is an empty block.
+ ReadOptions opt;
+ BlockContents contents;
+ if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
+ // Do not propagate errors since meta info is not needed for operation
+ return;
+ }
+ Block* meta = new Block(contents);
+
+ Iterator* iter = meta->NewIterator(BytewiseComparator());
+ std::string key = "filter.";
+ key.append(rep_->options.filter_policy->Name());
+ iter->Seek(key);
+ if (iter->Valid() && iter->key() == Slice(key)) {
+ ReadFilter(iter->value());
+ }
+ delete iter;
+ delete meta;
+}
+
+void Table::ReadFilter(const Slice& filter_handle_value) {
+ Slice v = filter_handle_value;
+ BlockHandle filter_handle;
+ if (!filter_handle.DecodeFrom(&v).ok()) {
+ return;
+ }
+
+ // We might want to unify with ReadBlock() if we start
+ // requiring checksum verification in Table::Open.
+ ReadOptions opt;
+ BlockContents block;
+ if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
+ return;
+ }
+ if (block.heap_allocated) {
+ rep_->filter_data = block.data.data(); // Will need to delete later
+ }
+ rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
+}
+
Table::~Table() {
delete rep_;
}
@@ -107,7 +166,7 @@ Iterator* Table::BlockReader(void* arg,
// can add more features in the future.
if (s.ok()) {
- bool may_cache;
+ BlockContents contents;
if (block_cache != NULL) {
char cache_key_buffer[16];
EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
@@ -117,14 +176,20 @@ Iterator* Table::BlockReader(void* arg,
if (cache_handle != NULL) {
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
} else {
- s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
- if (s.ok() && may_cache && options.fill_cache) {
- cache_handle = block_cache->Insert(
- key, block, block->size(), &DeleteCachedBlock);
+ s = ReadBlock(table->rep_->file, options, handle, &contents);
+ if (s.ok()) {
+ block = new Block(contents);
+ if (contents.cachable && options.fill_cache) {
+ cache_handle = block_cache->Insert(
+ key, block, block->size(), &DeleteCachedBlock);
+ }
}
}
} else {
- s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
+ s = ReadBlock(table->rep_->file, options, handle, &contents);
+ if (s.ok()) {
+ block = new Block(contents);
+ }
}
}
@@ -148,6 +213,39 @@ Iterator* Table::NewIterator(const ReadOptions& options) const {
&Table::BlockReader, const_cast<Table*>(this), options);
}
+Status Table::InternalGet(const ReadOptions& options, const Slice& k,
+ void* arg,
+ void (*saver)(void*, const Slice&, const Slice&)) {
+ Status s;
+ Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
+ iiter->Seek(k);
+ if (iiter->Valid()) {
+ Slice handle_value = iiter->value();
+ FilterBlockReader* filter = rep_->filter;
+ BlockHandle handle;
+ if (filter != NULL &&
+ handle.DecodeFrom(&handle_value).ok() &&
+ !filter->KeyMayMatch(handle.offset(), k)) {
+ // Not found
+ } else {
+ Slice handle = iiter->value();
+ Iterator* block_iter = BlockReader(this, options, iiter->value());
+ block_iter->Seek(k);
+ if (block_iter->Valid()) {
+ (*saver)(arg, block_iter->key(), block_iter->value());
+ }
+ s = block_iter->status();
+ delete block_iter;
+ }
+ }
+ if (s.ok()) {
+ s = iiter->status();
+ }
+ delete iiter;
+ return s;
+}
+
+
uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
Iterator* index_iter =
rep_->index_block->NewIterator(rep_->options.comparator);
diff --git a/table/table_builder.cc b/table/table_builder.cc
index 682ce5b..62002c8 100644
--- a/table/table_builder.cc
+++ b/table/table_builder.cc
@@ -5,14 +5,15 @@
#include "leveldb/table_builder.h"
#include <assert.h>
-#include <stdio.h>
#include "leveldb/comparator.h"
#include "leveldb/env.h"
+#include "leveldb/filter_policy.h"
+#include "leveldb/options.h"
#include "table/block_builder.h"
+#include "table/filter_block.h"
#include "table/format.h"
#include "util/coding.h"
#include "util/crc32c.h"
-#include "util/logging.h"
namespace leveldb {
@@ -27,6 +28,7 @@ struct TableBuilder::Rep {
std::string last_key;
int64_t num_entries;
bool closed; // Either Finish() or Abandon() has been called.
+ FilterBlockBuilder* filter_block;
// We do not emit the index entry for a block until we have seen the
// first key for the next data block. This allows us to use shorter
@@ -51,6 +53,8 @@ struct TableBuilder::Rep {
index_block(&index_block_options),
num_entries(0),
closed(false),
+ filter_block(opt.filter_policy == NULL ? NULL
+ : new FilterBlockBuilder(opt.filter_policy)),
pending_index_entry(false) {
index_block_options.block_restart_interval = 1;
}
@@ -58,10 +62,14 @@ struct TableBuilder::Rep {
TableBuilder::TableBuilder(const Options& options, WritableFile* file)
: rep_(new Rep(options, file)) {
+ if (rep_->filter_block != NULL) {
+ rep_->filter_block->StartBlock(0);
+ }
}
TableBuilder::~TableBuilder() {
assert(rep_->closed); // Catch errors where caller forgot to call Finish()
+ delete rep_->filter_block;
delete rep_;
}
@@ -98,6 +106,10 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
r->pending_index_entry = false;
}
+ if (r->filter_block != NULL) {
+ r->filter_block->AddKey(key);
+ }
+
r->last_key.assign(key.data(), key.size());
r->num_entries++;
r->data_block.Add(key, value);
@@ -119,6 +131,9 @@ void TableBuilder::Flush() {
r->pending_index_entry = true;
r->status = r->file->Flush();
}
+ if (r->filter_block != NULL) {
+ r->filter_block->StartBlock(r->offset);
+ }
}
void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
@@ -152,6 +167,15 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
break;
}
}
+ WriteRawBlock(block_contents, type, handle);
+ r->compressed_output.clear();
+ block->Reset();
+}
+
+void TableBuilder::WriteRawBlock(const Slice& block_contents,
+ CompressionType type,
+ BlockHandle* handle) {
+ Rep* r = rep_;
handle->set_offset(r->offset);
handle->set_size(block_contents.size());
r->status = r->file->Append(block_contents);
@@ -166,8 +190,6 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
r->offset += block_contents.size() + kBlockTrailerSize;
}
}
- r->compressed_output.clear();
- block->Reset();
}
Status TableBuilder::status() const {
@@ -179,13 +201,32 @@ Status TableBuilder::Finish() {
Flush();
assert(!r->closed);
r->closed = true;
- BlockHandle metaindex_block_handle;
- BlockHandle index_block_handle;
+
+ BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle;
+
+ // Write filter block
+ if (ok() && r->filter_block != NULL) {
+ WriteRawBlock(r->filter_block->Finish(), kNoCompression,
+ &filter_block_handle);
+ }
+
+ // Write metaindex block
if (ok()) {
BlockBuilder meta_index_block(&r->options);
+ if (r->filter_block != NULL) {
+ // Add mapping from "filter.Name" to location of filter data
+ std::string key = "filter.";
+ key.append(r->options.filter_policy->Name());
+ std::string handle_encoding;
+ filter_block_handle.EncodeTo(&handle_encoding);
+ meta_index_block.Add(key, handle_encoding);
+ }
+
// TODO(postrelease): Add stats and other meta blocks
WriteBlock(&meta_index_block, &metaindex_block_handle);
}
+
+ // Write index block
if (ok()) {
if (r->pending_index_entry) {
r->options.comparator->FindShortSuccessor(&r->last_key);
@@ -196,6 +237,8 @@ Status TableBuilder::Finish() {
}
WriteBlock(&r->index_block, &index_block_handle);
}
+
+ // Write footer
if (ok()) {
Footer footer;
footer.set_metaindex_handle(metaindex_block_handle);
diff --git a/table/table_test.cc b/table/table_test.cc
index 0c8e676..57cea25 100644
--- a/table/table_test.cc
+++ b/table/table_test.cc
@@ -168,8 +168,6 @@ class Constructor {
// Construct the data structure from the data in "data"
virtual Status FinishImpl(const Options& options, const KVMap& data) = 0;
- virtual size_t NumBytes() const = 0;
-
virtual Iterator* NewIterator() const = 0;
virtual const KVMap& data() { return data_; }
@@ -185,7 +183,6 @@ class BlockConstructor: public Constructor {
explicit BlockConstructor(const Comparator* cmp)
: Constructor(cmp),
comparator_(cmp),
- block_size_(-1),
block_(NULL) { }
~BlockConstructor() {
delete block_;
@@ -201,22 +198,21 @@ class BlockConstructor: public Constructor {
builder.Add(it->first, it->second);
}
// Open the block
- Slice block_data = builder.Finish();
- block_size_ = block_data.size();
- char* block_data_copy = new char[block_size_];
- memcpy(block_data_copy, block_data.data(), block_size_);
- block_ = new Block(block_data_copy, block_size_, true /* take ownership */);
+ data_ = builder.Finish().ToString();
+ BlockContents contents;
+ contents.data = data_;
+ contents.cachable = false;
+ contents.heap_allocated = false;
+ block_ = new Block(contents);
return Status::OK();
}
- virtual size_t NumBytes() const { return block_size_; }
-
virtual Iterator* NewIterator() const {
return block_->NewIterator(comparator_);
}
private:
const Comparator* comparator_;
- int block_size_;
+ std::string data_;
Block* block_;
BlockConstructor();
@@ -253,7 +249,6 @@ class TableConstructor: public Constructor {
table_options.comparator = options.comparator;
return Table::Open(table_options, source_, sink.contents().size(), &table_);
}
- virtual size_t NumBytes() const { return source_->Size(); }
virtual Iterator* NewIterator() const {
return table_->NewIterator(ReadOptions());
@@ -342,10 +337,6 @@ class MemTableConstructor: public Constructor {
}
return Status::OK();
}
- virtual size_t NumBytes() const {
- return memtable_->ApproximateMemoryUsage();
- }
-
virtual Iterator* NewIterator() const {
return new KeyConvertingIterator(memtable_->NewIterator());
}
@@ -379,13 +370,6 @@ class DBConstructor: public Constructor {
}
return Status::OK();
}
- virtual size_t NumBytes() const {
- Range r("", "\xff\xff");
- uint64_t size;
- db_->GetApproximateSizes(&r, 1, &size);
- return size;
- }
-
virtual Iterator* NewIterator() const {
return db_->NewIterator(ReadOptions());
}
@@ -809,7 +793,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 611000));
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
}