diff options
author | David Grogan <dgrogan@chromium.org> | 2013-12-10 10:36:31 -0800 |
---|---|---|
committer | David Grogan <dgrogan@chromium.org> | 2013-12-10 10:36:31 -0800 |
commit | 0cfb990d58ffba9b56df6e9829ddb9d220824066 (patch) | |
tree | 3fbb7b8fae37a78ffa9991cb0b57e3dbbb24e373 /util | |
parent | 0b9a89f40efdd143fa1426e7d5cd997f67ba6361 (diff) | |
download | leveldb-0cfb990d58ffba9b56df6e9829ddb9d220824066.tar.gz |
Release LevelDB 1.15v1.15
- switched from mmap based writing to simpler stdio based writing. Has a
minor impact (0.5 microseconds) on microbenchmarks for asynchronous
writes. Synchronous writes speed up from 30ms to 10ms on linux/ext4.
Should be much more reliable on diverse platforms.
- compaction errors now immediately put the database into a read-only
mode (until it is re-opened). As a downside, a disk going out of
space and then space being created will require a re-open to recover
from, whereas previously that would happen automatically. On the
plus side, many corruption possibilities go away.
- force the DB to enter an error-state so that all future writes fail
when a synchronous log write succeeds but the sync fails.
- repair now regenerates sstables that exhibit problems
- fix issue 218 - Use native memory barriers on OSX
- fix issue 212 - QNX build is broken
- fix build on iOS with xcode 5
- make tests compile and pass on windows
Diffstat (limited to 'util')
-rw-r--r-- | util/arena.h | 2 | ||||
-rw-r--r-- | util/arena_test.cc | 6 | ||||
-rw-r--r-- | util/bloom_test.cc | 3 | ||||
-rw-r--r-- | util/coding_test.cc | 8 | ||||
-rw-r--r-- | util/env_posix.cc | 176 | ||||
-rw-r--r-- | util/testharness.cc | 2 | ||||
-rw-r--r-- | util/testutil.cc | 2 | ||||
-rw-r--r-- | util/testutil.h | 2 |
8 files changed, 40 insertions, 161 deletions
diff --git a/util/arena.h b/util/arena.h index 8f7dde2..73bbf1c 100644 --- a/util/arena.h +++ b/util/arena.h @@ -5,9 +5,9 @@ #ifndef STORAGE_LEVELDB_UTIL_ARENA_H_ #define STORAGE_LEVELDB_UTIL_ARENA_H_ -#include <cstddef> #include <vector> #include <assert.h> +#include <stddef.h> #include <stdint.h> namespace leveldb { diff --git a/util/arena_test.cc b/util/arena_test.cc index 63d1778..58e870e 100644 --- a/util/arena_test.cc +++ b/util/arena_test.cc @@ -40,7 +40,7 @@ TEST(ArenaTest, Simple) { r = arena.Allocate(s); } - for (int b = 0; b < s; b++) { + for (size_t b = 0; b < s; b++) { // Fill the "i"th allocation with a known bit pattern r[b] = i % 256; } @@ -51,10 +51,10 @@ TEST(ArenaTest, Simple) { ASSERT_LE(arena.MemoryUsage(), bytes * 1.10); } } - for (int i = 0; i < allocated.size(); i++) { + for (size_t i = 0; i < allocated.size(); i++) { size_t num_bytes = allocated[i].first; const char* p = allocated[i].second; - for (int b = 0; b < num_bytes; b++) { + for (size_t b = 0; b < num_bytes; b++) { // Check the "i"th allocation for the known bit pattern ASSERT_EQ(int(p[b]) & 0xff, i % 256); } diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 0bf8e8d..77fb1b3 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -126,7 +126,8 @@ TEST(BloomTest, VaryingLengths) { } Build(); - ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length; + ASSERT_LE(FilterSize(), static_cast<size_t>((length * 10 / 8) + 40)) + << length; // All added keys must match for (int i = 0; i < length; i++) { diff --git a/util/coding_test.cc b/util/coding_test.cc index fb5726e..521541e 100644 --- a/util/coding_test.cc +++ b/util/coding_test.cc @@ -112,13 +112,13 @@ TEST(Coding, Varint64) { } std::string s; - for (int i = 0; i < values.size(); i++) { + for (size_t i = 0; i < values.size(); i++) { PutVarint64(&s, values[i]); } const char* p = s.data(); const char* limit = p + s.size(); - for (int i = 0; i < values.size(); i++) { + for (size_t i = 0; i < values.size(); i++) { ASSERT_TRUE(p < limit); uint64_t actual; const char* start = p; @@ -143,7 +143,7 @@ TEST(Coding, Varint32Truncation) { std::string s; PutVarint32(&s, large_value); uint32_t result; - for (int len = 0; len < s.size() - 1; len++) { + for (size_t len = 0; len < s.size() - 1; len++) { ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + len, &result) == NULL); } ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + s.size(), &result) != NULL); @@ -162,7 +162,7 @@ TEST(Coding, Varint64Truncation) { std::string s; PutVarint64(&s, large_value); uint64_t result; - for (int len = 0; len < s.size() - 1; len++) { + for (size_t len = 0; len < s.size() - 1; len++) { ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + len, &result) == NULL); } ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + s.size(), &result) != NULL); diff --git a/util/env_posix.cc b/util/env_posix.cc index 3e2925d..e1cbebd 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -175,147 +175,43 @@ class PosixMmapReadableFile: public RandomAccessFile { } }; -// We preallocate up to an extra megabyte and use memcpy to append new -// data to the file. This is safe since we either properly close the -// file before reading from it, or for log files, the reading code -// knows enough to skip zero suffixes. -class PosixMmapFile : public WritableFile { +class PosixWritableFile : public WritableFile { private: std::string filename_; - int fd_; - size_t page_size_; - size_t map_size_; // How much extra memory to map at a time - char* base_; // The mapped region - char* limit_; // Limit of the mapped region - char* dst_; // Where to write next (in range [base_,limit_]) - char* last_sync_; // Where have we synced up to - uint64_t file_offset_; // Offset of base_ in file - - // Have we done an munmap of unsynced data? - bool pending_sync_; - - // Roundup x to a multiple of y - static size_t Roundup(size_t x, size_t y) { - return ((x + y - 1) / y) * y; - } - - size_t TruncateToPageBoundary(size_t s) { - s -= (s & (page_size_ - 1)); - assert((s % page_size_) == 0); - return s; - } - - bool UnmapCurrentRegion() { - bool result = true; - if (base_ != NULL) { - if (last_sync_ < limit_) { - // Defer syncing this data until next Sync() call, if any - pending_sync_ = true; - } - if (munmap(base_, limit_ - base_) != 0) { - result = false; - } - file_offset_ += limit_ - base_; - base_ = NULL; - limit_ = NULL; - last_sync_ = NULL; - dst_ = NULL; - - // Increase the amount we map the next time, but capped at 1MB - if (map_size_ < (1<<20)) { - map_size_ *= 2; - } - } - return result; - } - - bool MapNewRegion() { - assert(base_ == NULL); - if (ftruncate(fd_, file_offset_ + map_size_) < 0) { - return false; - } - void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_, file_offset_); - if (ptr == MAP_FAILED) { - return false; - } - base_ = reinterpret_cast<char*>(ptr); - limit_ = base_ + map_size_; - dst_ = base_; - last_sync_ = base_; - return true; - } + FILE* file_; public: - PosixMmapFile(const std::string& fname, int fd, size_t page_size) - : filename_(fname), - fd_(fd), - page_size_(page_size), - map_size_(Roundup(65536, page_size)), - base_(NULL), - limit_(NULL), - dst_(NULL), - last_sync_(NULL), - file_offset_(0), - pending_sync_(false) { - assert((page_size & (page_size - 1)) == 0); - } - - - ~PosixMmapFile() { - if (fd_ >= 0) { - PosixMmapFile::Close(); + PosixWritableFile(const std::string& fname, FILE* f) + : filename_(fname), file_(f) { } + + ~PosixWritableFile() { + if (file_ != NULL) { + // Ignoring any potential errors + fclose(file_); } } virtual Status Append(const Slice& data) { - const char* src = data.data(); - size_t left = data.size(); - while (left > 0) { - assert(base_ <= dst_); - assert(dst_ <= limit_); - size_t avail = limit_ - dst_; - if (avail == 0) { - if (!UnmapCurrentRegion() || - !MapNewRegion()) { - return IOError(filename_, errno); - } - } - - size_t n = (left <= avail) ? left : avail; - memcpy(dst_, src, n); - dst_ += n; - src += n; - left -= n; + size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_); + if (r != data.size()) { + return IOError(filename_, errno); } return Status::OK(); } virtual Status Close() { - Status s; - size_t unused = limit_ - dst_; - if (!UnmapCurrentRegion()) { - s = IOError(filename_, errno); - } else if (unused > 0) { - // Trim the extra space at the end of the file - if (ftruncate(fd_, file_offset_ - unused) < 0) { - s = IOError(filename_, errno); - } - } - - if (close(fd_) < 0) { - if (s.ok()) { - s = IOError(filename_, errno); - } + Status result; + if (fclose(file_) != 0) { + result = IOError(filename_, errno); } - - fd_ = -1; - base_ = NULL; - limit_ = NULL; - return s; + file_ = NULL; + return result; } virtual Status Flush() { + if (fflush_unlocked(file_) != 0) { + return IOError(filename_, errno); + } return Status::OK(); } @@ -352,26 +248,10 @@ class PosixMmapFile : public WritableFile { if (!s.ok()) { return s; } - - if (pending_sync_) { - // Some unmapped data was not synced - pending_sync_ = false; - if (fdatasync(fd_) < 0) { - s = IOError(filename_, errno); - } + if (fflush_unlocked(file_) != 0 || + fdatasync(fileno(file_)) != 0) { + s = Status::IOError(filename_, strerror(errno)); } - - if (dst_ > last_sync_) { - // Find the beginnings of the pages that contain the first and last - // bytes to be synced. - size_t p1 = TruncateToPageBoundary(last_sync_ - base_); - size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1); - last_sync_ = dst_; - if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) { - s = IOError(filename_, errno); - } - } - return s; } }; @@ -462,12 +342,12 @@ class PosixEnv : public Env { virtual Status NewWritableFile(const std::string& fname, WritableFile** result) { Status s; - const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); - if (fd < 0) { + FILE* f = fopen(fname.c_str(), "w"); + if (f == NULL) { *result = NULL; s = IOError(fname, errno); } else { - *result = new PosixMmapFile(fname, fd, page_size_); + *result = new PosixWritableFile(fname, f); } return s; } @@ -630,7 +510,6 @@ class PosixEnv : public Env { return NULL; } - size_t page_size_; pthread_mutex_t mu_; pthread_cond_t bgsignal_; pthread_t bgthread_; @@ -645,8 +524,7 @@ class PosixEnv : public Env { MmapLimiter mmap_limit_; }; -PosixEnv::PosixEnv() : page_size_(getpagesize()), - started_bgthread_(false) { +PosixEnv::PosixEnv() : started_bgthread_(false) { PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL)); PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL)); } diff --git a/util/testharness.cc b/util/testharness.cc index eb1bdd5..402fab3 100644 --- a/util/testharness.cc +++ b/util/testharness.cc @@ -38,7 +38,7 @@ int RunAllTests() { int num = 0; if (tests != NULL) { - for (int i = 0; i < tests->size(); i++) { + for (size_t i = 0; i < tests->size(); i++) { const Test& t = (*tests)[i]; if (matcher != NULL) { std::string name = t.base; diff --git a/util/testutil.cc b/util/testutil.cc index 538d095..bee56bf 100644 --- a/util/testutil.cc +++ b/util/testutil.cc @@ -32,7 +32,7 @@ std::string RandomKey(Random* rnd, int len) { extern Slice CompressibleString(Random* rnd, double compressed_fraction, - int len, std::string* dst) { + size_t len, std::string* dst) { int raw = static_cast<int>(len * compressed_fraction); if (raw < 1) raw = 1; std::string raw_data; diff --git a/util/testutil.h b/util/testutil.h index 824e655..adad3fc 100644 --- a/util/testutil.h +++ b/util/testutil.h @@ -24,7 +24,7 @@ extern std::string RandomKey(Random* rnd, int len); // "N*compressed_fraction" bytes and return a Slice that references // the generated data. extern Slice CompressibleString(Random* rnd, double compressed_fraction, - int len, std::string* dst); + size_t len, std::string* dst); // A wrapper that allows injection of errors. class ErrorEnv : public EnvWrapper { |