10 files changed, 45 insertions, 31 deletions
diff --git a/db/builder.cc b/db/builder.cc
index f3d0fe2..d5585c3 100644
--- a/db/builder.cc
+++ b/db/builder.cc
@@ -74,7 +74,9 @@ Status BuildTable(const std::string& dbname,
 
     if (s.ok()) {
       // Verify that the table is usable
-      Iterator* it = table_cache->NewIterator(ReadOptions(), meta->number);
+      Iterator* it = table_cache->NewIterator(ReadOptions(),
+                                              meta->number,
+                                              meta->file_size);
       s = it->status();
       delete it;
     }
diff --git a/db/db_bench.cc b/db/db_bench.cc
index 7026ca1..c7a662d 100644
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@@ -354,7 +354,7 @@ class Benchmark {
  private:
   void Crc32c(int size, const char* label) {
     // Checksum about 500MB of data total
-    string data(size, 'x');
+    std::string data(size, 'x');
     int64_t bytes = 0;
     uint32_t crc = 0;
     while (bytes < 500 * 1048576) {
@@ -371,7 +371,7 @@ class Benchmark {
 
   void SHA1(int size, const char* label) {
     // SHA1 about 100MB of data total
-    string data(size, 'x');
+    std::string data(size, 'x');
     int64_t bytes = 0;
     char sha1[20];
     while (bytes < 100 * 1048576) {
diff --git a/db/db_impl.cc b/db/db_impl.cc
index 12c02b3..f14167a 100644
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@@ -642,7 +642,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
 
   if (s.ok() && current_entries > 0) {
     // Verify that the table is usable
-    Iterator* iter = table_cache_->NewIterator(ReadOptions(),output_number);
+    Iterator* iter = table_cache_->NewIterator(ReadOptions(),
+                                               output_number,
+                                               current_bytes);
     s = iter->status();
     delete iter;
     if (s.ok()) {
diff --git a/db/db_iter.cc b/db/db_iter.cc
index 6726b51..beb4d74 100644
--- a/db/db_iter.cc
+++ b/db/db_iter.cc
@@ -340,8 +340,11 @@ void DBIter::ReadIndirectValue(Slice ref) const {
   std::string fname = LargeValueFileName(*dbname_, large_ref);
   RandomAccessFile* file;
   Status s = env_->NewRandomAccessFile(fname, &file);
+  uint64_t file_size = 0;
+  if (s.ok()) {
+    s = env_->GetFileSize(fname, &file_size);
+  }
   if (s.ok()) {
-    uint64_t file_size = file->Size();
     uint64_t value_size = large_ref.ValueSize();
     large_->value.resize(value_size);
     Slice result;
diff --git a/db/log_reader.cc b/db/log_reader.cc
index 39a6d2b..407700d 100644
--- a/db/log_reader.cc
+++ b/db/log_reader.cc
@@ -105,7 +105,7 @@ void Reader::ReportDrop(size_t bytes, const char* reason) {
 
 unsigned int Reader::ReadPhysicalRecord(Slice* result) {
   while (true) {
-    if (buffer_.size() <= kHeaderSize) {
+    if (buffer_.size() < kHeaderSize) {
       if (!eof_) {
         // Last read was a full read, so this is a trailer to skip
         buffer_.clear();
@@ -124,12 +124,10 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result) {
       } else if (buffer_.size() == 0) {
         // End of file
         return kEof;
-      } else if (buffer_.size() < kHeaderSize) {
+      } else {
         ReportDrop(buffer_.size(), "truncated record at end of file");
         buffer_.clear();
         return kEof;
-      } else {
-        // We have a trailing zero-length record.  Fall through and check it.
       }
     }
 
diff --git a/db/log_writer.cc b/db/log_writer.cc
index 465eca2..fc33e6e 100644
--- a/db/log_writer.cc
+++ b/db/log_writer.cc
@@ -35,18 +35,19 @@ Status Writer::AddRecord(const Slice& slice) {
   do {
     const int leftover = kBlockSize - block_offset_;
     assert(leftover >= 0);
-    if (leftover <= kHeaderSize) {
+    if (leftover < kHeaderSize) {
       // Switch to a new block
       if (leftover > 0) {
-        // Fill the trailer
-        dest_->Append(Slice("\x00\x00\x00\x00\x00\x00\x00", leftover));
+        // Fill the trailer (literal below relies on kHeaderSize being 7)
+        assert(kHeaderSize == 7);
+        dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
       }
       block_offset_ = 0;
     }
 
-    // Invariant: we never leave <= kHeaderSize bytes in a block.
+    // Invariant: we never leave < kHeaderSize bytes in a block.
     const int avail = kBlockSize - block_offset_ - kHeaderSize;
-    assert(avail > 0);
+    assert(avail >= 0);
 
     const size_t fragment_length = (left < avail) ? left : avail;
 
diff --git a/db/repair.cc b/db/repair.cc
index 0727914..745b31a 100644
--- a/db/repair.cc
+++ b/db/repair.cc
@@ -261,7 +261,7 @@ class Repairer {
     Status status = env_->GetFileSize(fname, &t->meta.file_size);
     if (status.ok()) {
       Iterator* iter = table_cache_->NewIterator(
-          ReadOptions(), t->meta.number);
+          ReadOptions(), t->meta.number, t->meta.file_size);
       bool empty = true;
       ParsedInternalKey parsed;
       t->max_sequence = 0;
diff --git a/db/table_cache.cc b/db/table_cache.cc
index 604298d..6f750d6 100644
--- a/db/table_cache.cc
+++ b/db/table_cache.cc
@@ -44,6 +44,7 @@ TableCache::~TableCache() {
 
 Iterator* TableCache::NewIterator(const ReadOptions& options,
                                   uint64_t file_number,
+                                  uint64_t file_size,
                                   Table** tableptr) {
   if (tableptr != NULL) {
     *tableptr = NULL;
@@ -59,7 +60,7 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
     Table* table = NULL;
     Status s = env_->NewRandomAccessFile(fname, &file);
     if (s.ok()) {
-      s = Table::Open(*options_, file, &table);
+      s = Table::Open(*options_, file, file_size, &table);
     }
 
     if (!s.ok()) {
diff --git a/db/table_cache.h b/db/table_cache.h
index 6c357df..5564dfc 100644
--- a/db/table_cache.h
+++ b/db/table_cache.h
@@ -23,15 +23,16 @@ class TableCache {
   TableCache(const std::string& dbname, const Options* options, int entries);
   ~TableCache();
 
-  // Get an iterator for the specified file number and return it.  If
-  // "tableptr" is non-NULL, also sets "*tableptr" to point to the
-  // Table object underlying the returned iterator, or NULL if no
-  // Table object underlies the returned iterator.  The returned
-  // "*tableptr" object is owned by the cache and should not be
-  // deleted, and is valid for as long as the returned iterator is
-  // live.
+  // Return an iterator for the specified file number (the corresponding
+  // file length must be exactly "file_size" bytes).  If "tableptr" is
+  // non-NULL, also sets "*tableptr" to point to the Table object
+  // underlying the returned iterator, or NULL if no Table object underlies
+  // the returned iterator.  The returned "*tableptr" object is owned by
+  // the cache and should not be deleted, and is valid for as long as the
+  // returned iterator is live.
   Iterator* NewIterator(const ReadOptions& options,
                         uint64_t file_number,
+                        uint64_t file_size,
                         Table** tableptr = NULL);
 
   // Evict any entry for the specified file number
diff --git a/db/version_set.cc b/db/version_set.cc
index caf0b2d..b826e5b 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -75,8 +75,8 @@ Version::~Version() {
 // An internal iterator.  For a given version/level pair, yields
 // information about the files in the level.  For a given entry, key()
 // is the largest key that occurs in the file, and value() is an
-// 8-byte value containing the file number of the file, encoding using
-// EncodeFixed64.
+// 16-byte value containing the file number and file size, both
+// encoded using EncodeFixed64.
 class Version::LevelFileNumIterator : public Iterator {
  public:
   LevelFileNumIterator(const Version* version,
@@ -129,6 +129,7 @@ class Version::LevelFileNumIterator : public Iterator {
   Slice value() const {
     assert(Valid());
     EncodeFixed64(value_buf_, (*flist_)[index_]->number);
+    EncodeFixed64(value_buf_+8, (*flist_)[index_]->file_size);
     return Slice(value_buf_, sizeof(value_buf_));
   }
   virtual Status status() const { return Status::OK(); }
@@ -137,18 +138,21 @@ class Version::LevelFileNumIterator : public Iterator {
   const std::vector<FileMetaData*>* const flist_;
   int index_;
 
-  mutable char value_buf_[8];  // Used for encoding the file number for value()
+  // Backing store for value().  Holds the file number and size.
+  mutable char value_buf_[16];
 };
 
 static Iterator* GetFileIterator(void* arg,
                                  const ReadOptions& options,
                                  const Slice& file_value) {
   TableCache* cache = reinterpret_cast<TableCache*>(arg);
-  if (file_value.size() != 8) {
+  if (file_value.size() != 16) {
     return NewErrorIterator(
         Status::Corruption("FileReader invoked with unexpected value"));
   } else {
-    return cache->NewIterator(options, DecodeFixed64(file_value.data()));
+    return cache->NewIterator(options,
+                              DecodeFixed64(file_value.data()),
+                              DecodeFixed64(file_value.data() + 8));
   }
 }
 
@@ -164,7 +168,8 @@ void Version::AddIterators(const ReadOptions& options,
   // Merge all level zero files together since they may overlap
   for (int i = 0; i < files_[0].size(); i++) {
     iters->push_back(
-        vset_->table_cache_->NewIterator(options, files_[0][i]->number));
+        vset_->table_cache_->NewIterator(
+            options, files_[0][i]->number, files_[0][i]->file_size));
   }
 
   // For levels > 0, we can use a concatenating iterator that sequentially
@@ -650,7 +655,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
         // approximate offset of "ikey" within the table.
         Table* tableptr;
         Iterator* iter = table_cache_->NewIterator(
-            ReadOptions(), files[i]->number, &tableptr);
+            ReadOptions(), files[i]->number, files[i]->file_size, &tableptr);
         if (tableptr != NULL) {
           result += tableptr->ApproximateOffsetOf(ikey.Encode());
         }
@@ -855,7 +860,8 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
       if (c->level() + which == 0) {
         const std::vector<FileMetaData*>& files = c->inputs_[which];
         for (int i = 0; i < files.size(); i++) {
-          list[num++] = table_cache_->NewIterator(options, files[i]->number);
+          list[num++] = table_cache_->NewIterator(
+              options, files[i]->number, files[i]->file_size);
         }
       } else {
         // Create concatenating iterator for the files from this level