diff options
Diffstat (limited to 'src/mongo/db/storage')
28 files changed, 2706 insertions, 27 deletions
diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript index 6dada1ec2c8..83d21072778 100644 --- a/src/mongo/db/storage/SConscript +++ b/src/mongo/db/storage/SConscript @@ -49,6 +49,15 @@ env.Library( ) env.Library( + target='paths', + source=[ + 'paths.cpp', + ], + LIBDEPS=[ + ], +) + +env.Library( target='sorted_data_interface_test_harness', source=[ 'sorted_data_interface_test_bulkbuilder.cpp', @@ -104,7 +113,7 @@ env.Library( 'storage_engine_lock_file_${TARGET_OS_FAMILY}.cpp', ], LIBDEPS=[ - '$BUILD_DIR/mongo/util/paths', + 'paths', ] ) diff --git a/src/mongo/db/storage/mmap_v1/SConscript b/src/mongo/db/storage/mmap_v1/SConscript index c232cb43ce0..c7cbbd07e2c 100644 --- a/src/mongo/db/storage/mmap_v1/SConscript +++ b/src/mongo/db/storage/mmap_v1/SConscript @@ -33,13 +33,26 @@ env.Library( 'record_store_v1', 'record_access_tracker', 'btree', - '$BUILD_DIR/mongo/util/logfile', - '$BUILD_DIR/mongo/util/compress', - '$BUILD_DIR/mongo/util/file_allocator', - '$BUILD_DIR/mongo/util/paths', + 'file_allocator', + 'logfile', + 'compress', + '$BUILD_DIR/mongo/db/storage/paths', ] ) +compressEnv = env.Clone() +compressEnv.InjectThirdPartyIncludePaths(libraries=['snappy']) +compressEnv +compressEnv.Library( + target='compress', + source=[ + 'compress.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/db/storage/paths', + ], +) + env.Library( target= 'extent', source= [ @@ -53,6 +66,38 @@ env.Library( ) env.Library( + target='file_allocator', + source=[ + 'file_allocator.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/db/storage/paths', + ], +) + +env.Library( + target='logfile', + source=[ + 'logfile.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/db/storage/paths', + ], +) + +env.Library( + target='mmap', + source=[ + 'mmap.cpp', + 'mmap_${TARGET_OS_FAMILY}.cpp', + ], + LIBDEPS=[ + 'file_allocator', + '$BUILD_DIR/mongo/util/foundation', + ], +) + +env.Library( target= 'record_store_v1', source= [ 'record_store_v1_base.cpp', @@ -162,7 +207,7 @@ env.Library( ], LIBDEPS= [ 'btree', - '$BUILD_DIR/mongo/db/storage/mmap_v1/record_store_v1_test_help', + 'record_store_v1_test_help', ] ) diff --git a/src/mongo/db/storage/mmap_v1/compress.cpp b/src/mongo/db/storage/mmap_v1/compress.cpp new file mode 100644 index 00000000000..bae8bc5acba --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/compress.cpp @@ -0,0 +1,59 @@ +// @file compress.cpp + +/** +* Copyright (C) 2012 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects +* for all of the code used other than as permitted herein. If you modify +* file(s) with this exception, you may extend this exception to your +* version of the file(s), but you are not obligated to do so. If you do not +* wish to do so, delete this exception statement from your version. If you +* delete this exception statement from all source files in the program, +* then also delete it in the license file. +*/ + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/mmap_v1/compress.h" + +#include <snappy.h> + +namespace mongo { + + void rawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length) + { + snappy::RawCompress(input, input_length, compressed, compressed_length); + } + + size_t maxCompressedLength(size_t source_len) { + return snappy::MaxCompressedLength(source_len); + } + + size_t compress(const char* input, size_t input_length, std::string* output) { + return snappy::Compress(input, input_length, output); + } + + bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed) { + return snappy::Uncompress(compressed, compressed_length, uncompressed); + } + +} diff --git a/src/mongo/db/storage/mmap_v1/compress.h b/src/mongo/db/storage/mmap_v1/compress.h new file mode 100644 index 00000000000..b8afa4d90c5 --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/compress.h @@ -0,0 +1,49 @@ +// @file compress.h + +/** +* Copyright (C) 2012 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects +* for all of the code used other than as permitted herein. If you modify +* file(s) with this exception, you may extend this exception to your +* version of the file(s), but you are not obligated to do so. If you do not +* wish to do so, delete this exception statement from your version. If you +* delete this exception statement from all source files in the program, +* then also delete it in the license file. +*/ + +#pragma once + +#include <string> + +namespace mongo { + + size_t compress(const char* input, size_t input_length, std::string* output); + + bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed); + + size_t maxCompressedLength(size_t source_len); + void rawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length); + +} + + diff --git a/src/mongo/db/storage/mmap_v1/data_file.cpp b/src/mongo/db/storage/mmap_v1/data_file.cpp index fcd604d3206..04dda4902ef 100644 --- a/src/mongo/db/storage/mmap_v1/data_file.cpp +++ b/src/mongo/db/storage/mmap_v1/data_file.cpp @@ -42,7 +42,7 @@ #include "mongo/db/storage/mmap_v1/durable_mapped_file.h" #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" #include "mongo/db/operation_context.h" -#include "mongo/util/file_allocator.h" +#include "mongo/db/storage/mmap_v1/file_allocator.h" #include "mongo/util/log.h" namespace mongo { diff --git a/src/mongo/db/storage/mmap_v1/data_file_sync.cpp b/src/mongo/db/storage/mmap_v1/data_file_sync.cpp index cf1d1abca88..b65124052d7 100644 --- a/src/mongo/db/storage/mmap_v1/data_file_sync.cpp +++ b/src/mongo/db/storage/mmap_v1/data_file_sync.cpp @@ -35,11 +35,11 @@ #include "mongo/db/commands/server_status_metric.h" #include "mongo/db/service_context.h" #include "mongo/db/instance.h" +#include "mongo/db/storage/mmap_v1/mmap.h" #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" #include "mongo/db/storage_options.h" #include "mongo/util/exit.h" #include "mongo/util/log.h" -#include "mongo/util/mmap.h" namespace mongo { diff --git a/src/mongo/db/storage/mmap_v1/dur_journal.cpp b/src/mongo/db/storage/mmap_v1/dur_journal.cpp index 50cb5ee3d22..1a08dc3c01c 100644 --- a/src/mongo/db/storage/mmap_v1/dur_journal.cpp +++ b/src/mongo/db/storage/mmap_v1/dur_journal.cpp @@ -41,24 +41,24 @@ #include "mongo/base/init.h" #include "mongo/config.h" #include "mongo/db/client.h" +#include "mongo/db/storage/mmap_v1/mmap.h" #include "mongo/db/storage/mmap_v1/aligned_builder.h" +#include "mongo/db/storage/mmap_v1/compress.h" #include "mongo/db/storage/mmap_v1/dur_journalformat.h" #include "mongo/db/storage/mmap_v1/dur_journalimpl.h" #include "mongo/db/storage/mmap_v1/dur_stats.h" +#include "mongo/db/storage/mmap_v1/logfile.h" #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" +#include "mongo/db/storage/paths.h" #include "mongo/db/storage_options.h" #include "mongo/platform/random.h" #include "mongo/util/checksum.h" -#include "mongo/util/compress.h" #include "mongo/util/exit.h" #include "mongo/util/file.h" #include "mongo/util/hex.h" #include "mongo/util/log.h" -#include "mongo/util/logfile.h" -#include "mongo/util/mmap.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/net/listen.h" // getelapsedtimemillis -#include "mongo/util/paths.h" #include "mongo/util/progress_meter.h" #include "mongo/util/timer.h" diff --git a/src/mongo/db/storage/mmap_v1/dur_journalimpl.h b/src/mongo/db/storage/mmap_v1/dur_journalimpl.h index 04aca13e69d..7664e1f5265 100644 --- a/src/mongo/db/storage/mmap_v1/dur_journalimpl.h +++ b/src/mongo/db/storage/mmap_v1/dur_journalimpl.h @@ -31,7 +31,7 @@ #pragma once #include "mongo/db/storage/mmap_v1/dur_journalformat.h" -#include "mongo/util/logfile.h" +#include "mongo/db/storage/mmap_v1/logfile.h" namespace mongo { namespace dur { diff --git a/src/mongo/db/storage/mmap_v1/dur_recover.cpp b/src/mongo/db/storage/mmap_v1/dur_recover.cpp index b4e0a6b3bd9..b6d997d4025 100644 --- a/src/mongo/db/storage/mmap_v1/dur_recover.cpp +++ b/src/mongo/db/storage/mmap_v1/dur_recover.cpp @@ -40,6 +40,7 @@ #include <sys/stat.h> #include "mongo/db/operation_context_impl.h" +#include "mongo/db/storage/mmap_v1/compress.h" #include "mongo/db/storage/mmap_v1/dur_commitjob.h" #include "mongo/db/storage/mmap_v1/dur_journal.h" #include "mongo/db/storage/mmap_v1/dur_journalformat.h" @@ -49,7 +50,6 @@ #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" #include "mongo/util/bufreader.h" #include "mongo/util/checksum.h" -#include "mongo/util/compress.h" #include "mongo/util/exit.h" #include "mongo/util/hex.h" #include "mongo/util/log.h" diff --git a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h index 53a211f609e..0ebbd9dd7c9 100644 --- a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h +++ b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h @@ -31,8 +31,8 @@ #pragma once -#include "mongo/util/mmap.h" -#include "mongo/util/paths.h" +#include "mongo/db/storage/mmap_v1/mmap.h" +#include "mongo/db/storage/paths.h" namespace mongo { diff --git a/src/mongo/db/storage/mmap_v1/durop.cpp b/src/mongo/db/storage/mmap_v1/durop.cpp index aa70b169f32..ee91e0c60aa 100644 --- a/src/mongo/db/storage/mmap_v1/durop.cpp +++ b/src/mongo/db/storage/mmap_v1/durop.cpp @@ -42,7 +42,7 @@ #include "mongo/db/storage/mmap_v1/durable_mapped_file.h" #include "mongo/db/storage/mmap_v1/mmap_v1_engine.h" #include "mongo/util/file.h" -#include "mongo/util/file_allocator.h" +#include "mongo/db/storage/mmap_v1/file_allocator.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" diff --git a/src/mongo/db/storage/mmap_v1/durop.h b/src/mongo/db/storage/mmap_v1/durop.h index ee31780896f..e7da38a32d1 100644 --- a/src/mongo/db/storage/mmap_v1/durop.h +++ b/src/mongo/db/storage/mmap_v1/durop.h @@ -33,8 +33,8 @@ #include <boost/shared_ptr.hpp> #include "mongo/db/storage/mmap_v1/dur_journalformat.h" +#include "mongo/db/storage/paths.h" #include "mongo/util/bufreader.h" -#include "mongo/util/paths.h" namespace mongo { diff --git a/src/mongo/db/storage/mmap_v1/file_allocator.cpp b/src/mongo/db/storage/mmap_v1/file_allocator.cpp new file mode 100644 index 00000000000..d0bd764d25a --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/file_allocator.cpp @@ -0,0 +1,472 @@ +// @file file_allocator.cpp + +/* Copyright 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/mmap_v1/file_allocator.h" + +#include <boost/thread.hpp> +#include <boost/filesystem/operations.hpp> +#include <errno.h> +#include <fcntl.h> + +#if defined(__FreeBSD__) +# include <sys/param.h> +# include <sys/mount.h> +#endif + +#if defined(__linux__) +# include <sys/vfs.h> +#endif + +#if defined(_WIN32) +# include <io.h> +#endif + +#include "mongo/db/storage/paths.h" +#include "mongo/platform/posix_fadvise.h" +#include "mongo/stdx/functional.h" +#include "mongo/util/concurrency/thread_name.h" +#include "mongo/util/fail_point.h" +#include "mongo/util/fail_point_service.h" +#include "mongo/util/log.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/processinfo.h" +#include "mongo/util/time_support.h" +#include "mongo/util/timer.h" + +using namespace mongoutils; + +#ifndef O_NOATIME +#define O_NOATIME (0) +#endif + +namespace mongo { + + using std::endl; + using std::list; + using std::string; + using std::stringstream; + + // unique number for temporary file names + unsigned long long FileAllocator::_uniqueNumber = 0; + static SimpleMutex _uniqueNumberMutex( "uniqueNumberMutex" ); + + MONGO_FP_DECLARE(allocateDiskFull); + + /** + * Aliases for Win32 CRT functions + */ +#if defined(_WIN32) + static inline long lseek(int fd, long offset, int origin) { return _lseek(fd, offset, origin); } + static inline int write(int fd, const void *data, int count) { return _write(fd, data, count); } + static inline int close(int fd) { return _close(fd); } + + typedef BOOL (CALLBACK *GetVolumeInformationByHandleWPtr)(HANDLE, LPWSTR, DWORD, LPDWORD, LPDWORD, LPDWORD, LPWSTR, DWORD); + GetVolumeInformationByHandleWPtr GetVolumeInformationByHandleWFunc; + + MONGO_INITIALIZER(InitGetVolumeInformationByHandleW)(InitializerContext *context) { + HMODULE kernelLib = LoadLibraryA("kernel32.dll"); + if (kernelLib) { + GetVolumeInformationByHandleWFunc = reinterpret_cast<GetVolumeInformationByHandleWPtr> + (GetProcAddress(kernelLib, "GetVolumeInformationByHandleW")); + } + return Status::OK(); + } +#endif + + boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p){ + const boost::filesystem::path parent = p.branch_path(); + + if (! boost::filesystem::exists(parent)){ + ensureParentDirCreated(parent); + log() << "creating directory " << parent.string() << endl; + boost::filesystem::create_directory(parent); + flushMyDirectory(parent); // flushes grandparent to ensure parent exists after crash + } + + verify(boost::filesystem::is_directory(parent)); + return parent; + } + + FileAllocator::FileAllocator() : _failed() {} + + + void FileAllocator::start() { + boost::thread t( stdx::bind( &FileAllocator::run , this ) ); + } + + void FileAllocator::requestAllocation( const string &name, long &size ) { + boost::lock_guard<boost::mutex> lk( _pendingMutex ); + if ( _failed ) + return; + long oldSize = prevSize( name ); + if ( oldSize != -1 ) { + size = oldSize; + return; + } + _pending.push_back( name ); + _pendingSize[ name ] = size; + _pendingUpdated.notify_all(); + } + + void FileAllocator::allocateAsap( const string &name, unsigned long long &size ) { + boost::unique_lock<boost::mutex> lk( _pendingMutex ); + + // In case the allocator is in failed state, check once before starting so that subsequent + // requests for the same database would fail fast after the first one has failed. + checkFailure(); + + long oldSize = prevSize( name ); + if ( oldSize != -1 ) { + size = oldSize; + if ( !inProgress( name ) ) + return; + } + checkFailure(); + _pendingSize[ name ] = size; + if ( _pending.size() == 0 ) + _pending.push_back( name ); + else if ( _pending.front() != name ) { + _pending.remove( name ); + list< string >::iterator i = _pending.begin(); + ++i; + _pending.insert( i, name ); + } + _pendingUpdated.notify_all(); + while( inProgress( name ) ) { + checkFailure(); + _pendingUpdated.wait(lk); + } + + } + + void FileAllocator::waitUntilFinished() const { + if ( _failed ) + return; + boost::unique_lock<boost::mutex> lk( _pendingMutex ); + while( _pending.size() != 0 ) + _pendingUpdated.wait(lk); + } + + // TODO: pull this out to per-OS files once they exist + static bool useSparseFiles(int fd) { + +#if defined(__linux__) || defined(__FreeBSD__) + struct statfs fs_stats; + int ret = fstatfs(fd, &fs_stats); + uassert(16062, "fstatfs failed: " + errnoWithDescription(), ret == 0); +#endif + +#if defined(__linux__) +// these are from <linux/magic.h> but that isn't available on all systems +# define NFS_SUPER_MAGIC 0x6969 +# define TMPFS_MAGIC 0x01021994 + + return (fs_stats.f_type == NFS_SUPER_MAGIC) + || (fs_stats.f_type == TMPFS_MAGIC) + ; + +#elif defined(__FreeBSD__) + + return (str::equals(fs_stats.f_fstypename, "zfs") || + str::equals(fs_stats.f_fstypename, "nfs") || + str::equals(fs_stats.f_fstypename, "oldnfs")); + +#elif defined(__sun) + // assume using ZFS which is copy-on-write so no benefit to zero-filling + // TODO: check which fs we are using like we do elsewhere + return true; +#else + return false; +#endif + } + +#if defined(_WIN32) + static bool isFileOnNTFSVolume(int fd) { + if (!GetVolumeInformationByHandleWFunc) { + warning() << "Could not retrieve pointer to GetVolumeInformationByHandleW function"; + return false; + } + + HANDLE fileHandle = (HANDLE)_get_osfhandle(fd); + if (fileHandle == INVALID_HANDLE_VALUE) { + warning() << "_get_osfhandle() failed with " << _strerror(NULL); + return false; + } + + WCHAR fileSystemName[MAX_PATH + 1]; + if (!GetVolumeInformationByHandleWFunc(fileHandle, NULL, 0, NULL, 0, NULL, fileSystemName, sizeof(fileSystemName))) { + DWORD gle = GetLastError(); + warning() << "GetVolumeInformationByHandleW failed with " << errnoWithDescription(gle); + return false; + } + + return lstrcmpW(fileSystemName, L"NTFS") == 0; + } +#endif + + void FileAllocator::ensureLength(int fd , long size) { + // Test running out of disk scenarios + if (MONGO_FAIL_POINT(allocateDiskFull)) { + uasserted( 10444 , "File allocation failed due to failpoint."); + } + +#if !defined(_WIN32) + if (useSparseFiles(fd)) { + LOG(1) << "using ftruncate to create a sparse file" << endl; + int ret = ftruncate(fd, size); + uassert(16063, "ftruncate failed: " + errnoWithDescription(), ret == 0); + return; + } +#endif + +#if defined(__linux__) + int ret = posix_fallocate(fd,0,size); + if ( ret == 0 ) + return; + + log() << "FileAllocator: posix_fallocate failed: " << errnoWithDescription( ret ) << " falling back" << endl; +#endif + + off_t filelen = lseek( fd, 0, SEEK_END ); + if ( filelen < size ) { + if (filelen != 0) { + stringstream ss; + ss << "failure creating new datafile; lseek failed for fd " << fd << " with errno: " << errnoWithDescription(); + uassert( 10440 , ss.str(), filelen == 0 ); + } + // Check for end of disk. + + uassert( 10441 , str::stream() << "Unable to allocate new file of size " << size << ' ' << errnoWithDescription(), + size - 1 == lseek(fd, size - 1, SEEK_SET) ); + uassert( 10442 , str::stream() << "Unable to allocate new file of size " << size << ' ' << errnoWithDescription(), + 1 == write(fd, "", 1) ); + + // File expansion is completed here. Do not do the zeroing out on OS-es where there + // is no risk of triggering allocation-related bugs such as + // http://support.microsoft.com/kb/2731284. + // + if (!ProcessInfo::isDataFileZeroingNeeded()) { + return; + } + +#if defined(_WIN32) + if (!isFileOnNTFSVolume(fd)) { + log() << "No need to zero out datafile on non-NTFS volume" << endl; + return; + } +#endif + + lseek(fd, 0, SEEK_SET); + + const long z = 256 * 1024; + const boost::scoped_array<char> buf_holder (new char[z]); + char* buf = buf_holder.get(); + memset(buf, 0, z); + long left = size; + while ( left > 0 ) { + long towrite = left; + if ( towrite > z ) + towrite = z; + + int written = write( fd , buf , towrite ); + uassert( 10443 , errnoWithPrefix("FileAllocator: file write failed" ), written > 0 ); + left -= written; + } + } + } + + void FileAllocator::checkFailure() { + if (_failed) { + // we want to log the problem (diskfull.js expects it) but we do not want to dump a stack tracke + msgassertedNoTrace( 12520, "new file allocation failure" ); + } + } + + long FileAllocator::prevSize( const string &name ) const { + if ( _pendingSize.count( name ) > 0 ) + return _pendingSize[ name ]; + if ( boost::filesystem::exists( name ) ) + return boost::filesystem::file_size( name ); + return -1; + } + + // caller must hold _pendingMutex lock. + bool FileAllocator::inProgress( const string &name ) const { + for( list< string >::const_iterator i = _pending.begin(); i != _pending.end(); ++i ) + if ( *i == name ) + return true; + return false; + } + + string FileAllocator::makeTempFileName( boost::filesystem::path root ) { + while( 1 ) { + boost::filesystem::path p = root / "_tmp"; + stringstream ss; + unsigned long long thisUniqueNumber; + { + // increment temporary file name counter + // TODO: SERVER-6055 -- Unify temporary file name selection + SimpleMutex::scoped_lock lk(_uniqueNumberMutex); + thisUniqueNumber = _uniqueNumber; + ++_uniqueNumber; + } + ss << thisUniqueNumber; + p /= ss.str(); + string fn = p.string(); + if( !boost::filesystem::exists(p) ) + return fn; + } + return ""; + } + + void FileAllocator::run( FileAllocator * fa ) { + setThreadName( "FileAllocator" ); + { + // initialize unique temporary file name counter + // TODO: SERVER-6055 -- Unify temporary file name selection + SimpleMutex::scoped_lock lk(_uniqueNumberMutex); + _uniqueNumber = curTimeMicros64(); + } + while( 1 ) { + { + boost::unique_lock<boost::mutex> lk( fa->_pendingMutex ); + if ( fa->_pending.size() == 0 ) + fa->_pendingUpdated.wait(lk); + } + while( 1 ) { + string name; + long size = 0; + { + boost::lock_guard<boost::mutex> lk( fa->_pendingMutex ); + if ( fa->_pending.size() == 0 ) + break; + name = fa->_pending.front(); + size = fa->_pendingSize[ name ]; + } + + string tmp; + long fd = 0; + try { + log() << "allocating new datafile " << name << ", filling with zeroes..." << endl; + + boost::filesystem::path parent = ensureParentDirCreated(name); + tmp = fa->makeTempFileName( parent ); + ensureParentDirCreated(tmp); + +#if defined(_WIN32) + fd = _open( tmp.c_str(), _O_RDWR | _O_CREAT | O_NOATIME, _S_IREAD | _S_IWRITE ); +#else + fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR); +#endif + if ( fd < 0 ) { + log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") " << errnoWithDescription() << endl; + uasserted(10439, ""); + } + +#if defined(POSIX_FADV_DONTNEED) + if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) { + log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") " << errnoWithDescription() << endl; + } +#endif + + Timer t; + + /* make sure the file is the full desired length */ + ensureLength( fd , size ); + + close( fd ); + fd = 0; + + if( rename(tmp.c_str(), name.c_str()) ) { + const string& errStr = errnoWithDescription(); + const string& errMessage = str::stream() + << "error: couldn't rename " << tmp + << " to " << name << ' ' << errStr; + msgasserted(13653, errMessage); + } + flushMyDirectory(name); + + log() << "done allocating datafile " << name << ", " + << "size: " << size/1024/1024 << "MB, " + << " took " << ((double)t.millis())/1000.0 << " secs" + << endl; + + // no longer in a failed state. allow new writers. + fa->_failed = false; + } + catch ( const std::exception& e ) { + log() << "error: failed to allocate new file: " << name + << " size: " << size << ' ' << e.what() + << ". will try again in 10 seconds" << endl; + if ( fd > 0 ) + close( fd ); + try { + if ( ! tmp.empty() ) + boost::filesystem::remove( tmp ); + boost::filesystem::remove( name ); + } catch ( const std::exception& e ) { + log() << "error removing files: " << e.what() << endl; + } + + { + boost::lock_guard<boost::mutex> lk(fa->_pendingMutex); + fa->_failed = true; + + // TODO: Should we remove the file from pending? + fa->_pendingUpdated.notify_all(); + } + + + sleepsecs(10); + continue; + } + + { + boost::lock_guard<boost::mutex> lk( fa->_pendingMutex ); + fa->_pendingSize.erase( name ); + fa->_pending.pop_front(); + fa->_pendingUpdated.notify_all(); + } + } + } + } + + FileAllocator* FileAllocator::_instance = 0; + + FileAllocator* FileAllocator::get(){ + if ( ! _instance ) + _instance = new FileAllocator(); + return _instance; + } + +} // namespace mongo diff --git a/src/mongo/db/storage/mmap_v1/file_allocator.h b/src/mongo/db/storage/mmap_v1/file_allocator.h new file mode 100644 index 00000000000..aabe2a368bf --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/file_allocator.h @@ -0,0 +1,109 @@ +// @file file_allocator.h + +/* Copyright 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include <list> +#include <boost/filesystem/path.hpp> +#include <boost/noncopyable.hpp> +#include <boost/thread/condition.hpp> + +#include "mongo/util/concurrency/mutex.h" + +namespace mongo { + + /* + * Handles allocation of contiguous files on disk. Allocation may be + * requested asynchronously or synchronously. + * singleton + */ + class FileAllocator : boost::noncopyable { + /* + * The public functions may not be called concurrently. The allocation + * functions may be called multiple times per file, but only the first + * size specified per file will be used. + */ + public: + void start(); + + /** + * May be called if file exists. If file exists, or its allocation has + * been requested, size is updated to match existing file size. + */ + void requestAllocation( const std::string &name, long &size ); + + + /** + * Returns when file has been allocated. If file exists, size is + * updated to match existing file size. + */ + void allocateAsap( const std::string &name, unsigned long long &size ); + + void waitUntilFinished() const; + + static void ensureLength(int fd, long size); + + /** @return the singleton */ + static FileAllocator * get(); + + private: + + FileAllocator(); + + void checkFailure(); + + // caller must hold pendingMutex_ lock. Returns size if allocated or + // allocation requested, -1 otherwise. + long prevSize( const std::string &name ) const; + + // caller must hold pendingMutex_ lock. + bool inProgress( const std::string &name ) const; + + /** called from the worked thread */ + static void run( FileAllocator * fa ); + + // generate a unique name for temporary files + std::string makeTempFileName( boost::filesystem::path root ); + + mutable mongo::mutex _pendingMutex; + mutable boost::condition _pendingUpdated; + + std::list< std::string > _pending; + mutable std::map< std::string, long > _pendingSize; + + // unique number for temporary files + static unsigned long long _uniqueNumber; + + bool _failed; + + static FileAllocator* _instance; + + }; + +} // namespace mongo diff --git a/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp b/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp index bbabd11c179..8c29741ed7e 100644 --- a/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp +++ b/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp @@ -46,10 +46,10 @@ #include "mongo/db/query/internal_plans.h" #include "mongo/db/storage_options.h" #include "mongo/db/storage/mmap_v1/aligned_builder.h" +#include "mongo/db/storage/mmap_v1/logfile.h" +#include "mongo/db/storage/paths.h" #include "mongo/scripting/engine.h" #include "mongo/util/background.h" -#include "mongo/util/logfile.h" -#include "mongo/util/paths.h" #include "mongo/util/timer.h" namespace mongo { diff --git a/src/mongo/db/storage/mmap_v1/logfile.cpp b/src/mongo/db/storage/mmap_v1/logfile.cpp new file mode 100644 index 00000000000..8aa5e32626f --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/logfile.cpp @@ -0,0 +1,270 @@ +// @file logfile.cpp simple file log writing / journaling + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects +* for all of the code used other than as permitted herein. If you modify +* file(s) with this exception, you may extend this exception to your +* version of the file(s), but you are not obligated to do so. If you do not +* wish to do so, delete this exception statement from your version. If you +* delete this exception statement from all source files in the program, +* then also delete it in the license file. +*/ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/mmap_v1/logfile.h" + +#include "mongo/db/storage/mmap_v1/mmap.h" +#include "mongo/db/storage/paths.h" +#include "mongo/platform/posix_fadvise.h" +#include "mongo/util/allocator.h" +#include "mongo/util/log.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/startup_test.h" +#include "mongo/util/text.h" + + +using namespace mongoutils; + +using std::endl; +using std::string; + +#if defined(_WIN32) + +namespace mongo { + + LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) { + _fd = CreateFile( + toNativeString(name.c_str()).c_str(), + (readwrite?GENERIC_READ:0)|GENERIC_WRITE, + FILE_SHARE_READ, + NULL, + OPEN_ALWAYS, + FILE_FLAG_NO_BUFFERING, + NULL); + if( _fd == INVALID_HANDLE_VALUE ) { + DWORD e = GetLastError(); + uasserted(13518, str::stream() << "couldn't open file " << name << " for writing " << errnoWithDescription(e)); + } + SetFilePointer(_fd, 0, 0, FILE_BEGIN); + } + + LogFile::~LogFile() { + if( _fd != INVALID_HANDLE_VALUE ) + CloseHandle(_fd); + } + + void LogFile::truncate() { + verify(_fd != INVALID_HANDLE_VALUE); + + if (!SetEndOfFile(_fd)){ + msgasserted(15871, "Couldn't truncate file: " + errnoWithDescription()); + } + } + + void LogFile::writeAt(unsigned long long offset, const void *_buf, size_t _len) { +// TODO 64 bit offsets + OVERLAPPED o; + memset(&o,0,sizeof(o)); + (unsigned long long&) o.Offset = offset; + BOOL ok= WriteFile(_fd, _buf, _len, 0, &o); + verify(ok); + } + + void LogFile::readAt(unsigned long long offset, void *_buf, size_t _len) { +// TODO 64 bit offsets + OVERLAPPED o; + memset(&o,0,sizeof(o)); + (unsigned long long&) o.Offset = offset; + DWORD nr; + BOOL ok = ReadFile(_fd, _buf, _len, &nr, &o); + if( !ok ) { + string e = errnoWithDescription(); + //DWORD e = GetLastError(); + log() << "LogFile readAt(" << offset << ") len:" << _len << "errno:" << e << endl; + verify(false); + } + } + + void LogFile::synchronousAppend(const void *_buf, size_t _len) { + const size_t BlockSize = 8 * 1024 * 1024; + verify(_fd); + verify(_len % g_minOSPageSizeBytes == 0); + const char *buf = (const char *) _buf; + size_t left = _len; + while( left ) { + size_t toWrite = std::min(left, BlockSize); + DWORD written; + if( !WriteFile(_fd, buf, toWrite, &written, NULL) ) { + DWORD e = GetLastError(); + if( e == 87 ) + msgasserted(13519, "error 87 appending to file - invalid parameter"); + else + uasserted(13517, str::stream() << "error appending to file " << _name << ' ' << _len << ' ' << toWrite << ' ' << errnoWithDescription(e)); + } + else { + dassert( written == toWrite ); + } + left -= written; + buf += written; + } + } + +} + +#else + +/// posix + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/ioctl.h> + +#ifdef __linux__ +#include <linux/fs.h> +#endif + +namespace mongo { + + LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) { + int options = O_CREAT + | (readwrite?O_RDWR:O_WRONLY) +#if defined(O_DIRECT) + | O_DIRECT +#endif +#if defined(O_NOATIME) + | O_NOATIME +#endif + ; + + _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR); + _blkSize = g_minOSPageSizeBytes; + +#if defined(O_DIRECT) + _direct = true; + if( _fd < 0 ) { + _direct = false; + options &= ~O_DIRECT; + _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR); + } +#ifdef __linux__ + ssize_t tmpBlkSize = ioctl(_fd, BLKBSZGET); + // TODO: We need some sanity checking on tmpBlkSize even if ioctl() did not fail. + if (tmpBlkSize > 0) { + _blkSize = (size_t)tmpBlkSize; + } +#endif +#else + _direct = false; +#endif + + if( _fd < 0 ) { + uasserted(13516, str::stream() << "couldn't open file " << name << " for writing " << errnoWithDescription()); + } + + flushMyDirectory(name); + } + + LogFile::~LogFile() { + if( _fd >= 0 ) + close(_fd); + _fd = -1; + } + + void LogFile::truncate() { + verify(_fd >= 0); + + BOOST_STATIC_ASSERT(sizeof(off_t) == 8); // we don't want overflow here + const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek + if (ftruncate(_fd, pos) != 0){ + msgasserted(15873, "Couldn't truncate file: " + errnoWithDescription()); + } + + fsync(_fd); + } + + void LogFile::writeAt(unsigned long long offset, const void *buf, size_t len) { + verify(((size_t)buf) % g_minOSPageSizeBytes == 0); // aligned + ssize_t written = pwrite(_fd, buf, len, offset); + if( written != (ssize_t) len ) { + log() << "writeAt fails " << errnoWithDescription() << endl; + } +#if defined(__linux__) + fdatasync(_fd); +#else + fsync(_fd); +#endif + } + + void LogFile::readAt(unsigned long long offset, void *_buf, size_t _len) { + verify(((size_t)_buf) % g_minOSPageSizeBytes == 0); // aligned + ssize_t rd = pread(_fd, _buf, _len, offset); + verify( rd != -1 ); + } + + void LogFile::synchronousAppend(const void *b, size_t len) { + + const char *buf = static_cast<const char *>( b ); + ssize_t charsToWrite = static_cast<ssize_t>( len ); + + fassert( 16144, charsToWrite >= 0 ); + fassert( 16142, _fd >= 0 ); + fassert( 16143, reinterpret_cast<size_t>( buf ) % _blkSize == 0 ); // aligned + +#ifdef POSIX_FADV_DONTNEED + const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek, just get current position +#endif + + while ( charsToWrite > 0 ) { + const ssize_t written = write( _fd, buf, static_cast<size_t>( charsToWrite ) ); + if ( -1 == written ) { + log() << "LogFile::synchronousAppend failed with " << charsToWrite + << " bytes unwritten out of " << len << " bytes; b=" << b << ' ' + << errnoWithDescription() << std::endl; + fassertFailed( 13515 ); + } + buf += written; + charsToWrite -= written; + } + + if( +#if defined(__linux__) + fdatasync(_fd) < 0 +#else + fsync(_fd) +#endif + ) { + log() << "error appending to file on fsync " << ' ' << errnoWithDescription(); + fassertFailed( 13514 ); + } + +#ifdef POSIX_FADV_DONTNEED + if (!_direct) + posix_fadvise(_fd, pos, len, POSIX_FADV_DONTNEED); +#endif + } + +} + +#endif diff --git a/src/mongo/db/storage/mmap_v1/logfile.h b/src/mongo/db/storage/mmap_v1/logfile.h new file mode 100644 index 00000000000..278b9c162aa --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/logfile.h @@ -0,0 +1,77 @@ +// @file logfile.h simple file log writing / journaling + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects +* for all of the code used other than as permitted herein. If you modify +* file(s) with this exception, you may extend this exception to your +* version of the file(s), but you are not obligated to do so. If you do not +* wish to do so, delete this exception statement from your version. If you +* delete this exception statement from all source files in the program, +* then also delete it in the license file. +*/ + +#pragma once + +#include <string> + + +namespace mongo { + + class LogFile { + public: + /** create the file and open. must not already exist. + throws UserAssertion on i/o error + */ + LogFile(const std::string& name, bool readwrite = false); + + /** closes */ + ~LogFile(); + + /** append to file. does not return until sync'd. uses direct i/o when possible. + throws UserAssertion on an i/o error + note direct i/o may have alignment requirements + */ + void synchronousAppend(const void *buf, size_t len); + + /** write at specified offset. must be aligned. noreturn until physically written. thread safe */ + void writeAt(unsigned long long offset, const void *_bug, size_t _len); + + void readAt(unsigned long long offset, void *_buf, size_t _len); + + const std::string _name; + + void truncate(); // Removes extra data after current position + + private: +#if defined(_WIN32) + typedef HANDLE fd_type; +#else + typedef int fd_type; +#endif + fd_type _fd; + bool _direct; // are we using direct I/O + + // Block size, in case of direct I/O we need to test alignment against the page size, + // which can be different than 4kB. + size_t _blkSize; + }; + +} diff --git a/src/mongo/db/storage/mmap_v1/mmap.cpp b/src/mongo/db/storage/mmap_v1/mmap.cpp new file mode 100644 index 00000000000..e9519fc7d94 --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/mmap.cpp @@ -0,0 +1,258 @@ +// mmap.cpp + +/* Copyright 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/mmap_v1/mmap.h" + +#include <boost/filesystem/operations.hpp> + +#include "mongo/base/owned_pointer_vector.h" +#include "mongo/util/concurrency/rwlock.h" +#include "mongo/util/log.h" +#include "mongo/util/map_util.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/processinfo.h" +#include "mongo/util/progress_meter.h" +#include "mongo/util/startup_test.h" + +namespace mongo { + + using std::endl; + using std::map; + using std::set; + using std::string; + using std::stringstream; + using std::vector; + + void minOSPageSizeBytesTest(size_t minOSPageSizeBytes) { + fassert( 16325, minOSPageSizeBytes > 0 ); + fassert( 16326, minOSPageSizeBytes < 1000000 ); + // check to see if the page size is a power of 2 + fassert( 16327, (minOSPageSizeBytes & (minOSPageSizeBytes - 1)) == 0); + } + +namespace { + set<MongoFile*> mmfiles; + map<string,MongoFile*> pathToFile; +} // namespace + + /* Create. Must not exist. + @param zero fill file with zeros when true + */ + void* MemoryMappedFile::create(const std::string& filename, unsigned long long len, bool zero) { + uassert( 13468, string("can't create file already exists ") + filename, ! boost::filesystem::exists(filename) ); + void *p = map(filename.c_str(), len); + if( p && zero ) { + size_t sz = (size_t) len; + verify( len == sz ); + memset(p, 0, sz); + } + return p; + } + + /*static*/ void MemoryMappedFile::updateLength( const char *filename, unsigned long long &length ) { + if ( !boost::filesystem::exists( filename ) ) + return; + // make sure we map full length if preexisting file. + boost::uintmax_t l = boost::filesystem::file_size( filename ); + length = l; + } + + void* MemoryMappedFile::map(const char *filename) { + unsigned long long l; + try { + l = boost::filesystem::file_size( filename ); + } + catch(boost::filesystem::filesystem_error& e) { + uasserted(15922, mongoutils::str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() ); + } + return map( filename , l ); + } + void* MemoryMappedFile::mapWithOptions(const char *filename, int options) { + unsigned long long l; + try { + l = boost::filesystem::file_size( filename ); + } + catch(boost::filesystem::filesystem_error& e) { + uasserted(15923, mongoutils::str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() ); + } + return map( filename , l, options ); + } + + /* --- MongoFile ------------------------------------------------- + this is the administrative stuff + */ + + RWLockRecursiveNongreedy LockMongoFilesShared::mmmutex("mmmutex",10*60*1000 /* 10 minutes */); + unsigned LockMongoFilesShared::era = 99; // note this rolls over + + set<MongoFile*>& MongoFile::getAllFiles() { return mmfiles; } + + /* subclass must call in destructor (or at close). + removes this from pathToFile and other maps + safe to call more than once, albeit might be wasted work + ideal to call close to the close, if the close is well before object destruction + */ + void MongoFile::destroyed() { + LockMongoFilesShared::assertExclusivelyLocked(); + mmfiles.erase(this); + pathToFile.erase( filename() ); + } + + /*static*/ + void MongoFile::closeAllFiles( stringstream &message ) { + static int closingAllFiles = 0; + if ( closingAllFiles ) { + message << "warning closingAllFiles=" << closingAllFiles << endl; + return; + } + ++closingAllFiles; + + LockMongoFilesExclusive lk; + + ProgressMeter pm(mmfiles.size(), 2, 1, "files", "File Closing Progress"); + set<MongoFile*> temp = mmfiles; + for ( set<MongoFile*>::iterator i = temp.begin(); i != temp.end(); i++ ) { + (*i)->close(); // close() now removes from mmfiles + pm.hit(); + } + message << "closeAllFiles() finished"; + --closingAllFiles; + } + + /*static*/ long long MongoFile::totalMappedLength() { + unsigned long long total = 0; + + LockMongoFilesShared lk; + + for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) + total += (*i)->length(); + + return total; + } + + void nullFunc() { } + + // callback notifications + void (*MongoFile::notifyPreFlush)() = nullFunc; + void (*MongoFile::notifyPostFlush)() = nullFunc; + + /*static*/ int MongoFile::flushAll( bool sync ) { + if ( sync ) notifyPreFlush(); + int x = _flushAll(sync); + if ( sync ) notifyPostFlush(); + return x; + } + + /*static*/ int MongoFile::_flushAll( bool sync ) { + if ( ! sync ) { + int num = 0; + LockMongoFilesShared lk; + for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) { + num++; + MongoFile * mmf = *i; + if ( ! mmf ) + continue; + + mmf->flush( sync ); + } + return num; + } + + // want to do it sync + + // get a thread-safe Flushable object for each file first in a single lock + // so that we can iterate and flush without doing any locking here + OwnedPointerVector<Flushable> thingsToFlushWrapper; + vector<Flushable*>& thingsToFlush = thingsToFlushWrapper.mutableVector(); + { + LockMongoFilesShared lk; + for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) { + MongoFile* mmf = *i; + if ( !mmf ) + continue; + thingsToFlush.push_back( mmf->prepareFlush() ); + } + } + + for ( size_t i = 0; i < thingsToFlush.size(); i++ ) { + thingsToFlush[i]->flush(); + } + + return thingsToFlush.size(); + } + + void MongoFile::created() { + LockMongoFilesExclusive lk; + mmfiles.insert(this); + } + + void MongoFile::setFilename(const std::string& fn) { + LockMongoFilesExclusive lk; + verify( _filename.empty() ); + _filename = boost::filesystem::absolute(fn).generic_string(); + MongoFile *&ptf = pathToFile[_filename]; + massert(13617, "MongoFile : multiple opens of same filename", ptf == 0); + ptf = this; + } + + MongoFile* MongoFileFinder::findByPath(const std::string& path) const { + return mapFindWithDefault(pathToFile, + boost::filesystem::absolute(path).generic_string(), + static_cast<MongoFile*>(NULL)); + } + + + void printMemInfo( const char * where ) { + LogstreamBuilder out = log(); + out << "mem info: "; + if ( where ) + out << where << " "; + + ProcessInfo pi; + if ( ! pi.supported() ) { + out << " not supported"; + return; + } + + out << "vsize: " << pi.getVirtualMemorySize() + << " resident: " << pi.getResidentSize() + << " mapped: " << ( MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ) ); + } + + void dataSyncFailedHandler() { + log() << "error syncing data to disk, probably a disk error"; + log() << " shutting down immediately to avoid corruption"; + fassertFailed( 17346 ); + } + +} // namespace mongo diff --git a/src/mongo/db/storage/mmap_v1/mmap.h b/src/mongo/db/storage/mmap_v1/mmap.h new file mode 100644 index 00000000000..7b34b21b254 --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/mmap.h @@ -0,0 +1,262 @@ +// mmap.h + +/* Copyright 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +#include <set> +#include <sstream> +#include <vector> + +#include "mongo/util/concurrency/rwlock.h" + +namespace mongo { + +#if !defined(_WIN32) + typedef int HANDLE; +#endif + + extern const size_t g_minOSPageSizeBytes; + void minOSPageSizeBytesTest(size_t minOSPageSizeBytes); // lame-o + + // call this if syncing data fails + void dataSyncFailedHandler(); + + class MAdvise { + MONGO_DISALLOW_COPYING(MAdvise); + public: + enum Advice { Sequential=1 , Random=2 }; + MAdvise(void *p, unsigned len, Advice a); + ~MAdvise(); // destructor resets the range to MADV_NORMAL + private: + void *_p; + unsigned _len; + }; + + // lock order: lock dbMutex before this if you lock both + class LockMongoFilesShared { + friend class LockMongoFilesExclusive; + static RWLockRecursiveNongreedy mmmutex; + static unsigned era; + RWLockRecursive::Shared lk; + public: + LockMongoFilesShared() : lk(mmmutex) { } + + /** era changes anytime memory maps come and go. thus you can use this as a cheap way to check + if nothing has changed since the last time you locked. Of course you must be shared locked + at the time of this call, otherwise someone could be in progress. + + This is used for yielding; see PageFaultException::touch(). + */ + static unsigned getEra() { return era; } + + static void assertExclusivelyLocked() { mmmutex.assertExclusivelyLocked(); } + static void assertAtLeastReadLocked() { mmmutex.assertAtLeastReadLocked(); } + }; + + class LockMongoFilesExclusive { + RWLockRecursive::Exclusive lk; + public: + LockMongoFilesExclusive() : lk(LockMongoFilesShared::mmmutex) { + LockMongoFilesShared::era++; + } + }; + + /* the administrative-ish stuff here */ + class MongoFile { + MONGO_DISALLOW_COPYING(MongoFile); + public: + /** Flushable has to fail nicely if the underlying object gets killed */ + class Flushable { + public: + virtual ~Flushable() {} + virtual void flush() = 0; + }; + + MongoFile() {} + virtual ~MongoFile() {} + + enum Options { + SEQUENTIAL = 1, // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows + READONLY = 2 // not contractually guaranteed, but if specified the impl has option to fault writes + }; + + /** @param fun is called for each MongoFile. + called from within a mutex that MongoFile uses. so be careful not to deadlock. + */ + template < class F > + static void forEach( F fun ); + + /** note: you need to be in mmmutex when using this. forEach (above) handles that for you automatically. +*/ + static std::set<MongoFile*>& getAllFiles(); + + // callbacks if you need them + static void (*notifyPreFlush)(); + static void (*notifyPostFlush)(); + + static int flushAll( bool sync ); // returns n flushed + static long long totalMappedLength(); + static void closeAllFiles( std::stringstream &message ); + + virtual bool isDurableMappedFile() { return false; } + + std::string filename() const { return _filename; } + void setFilename(const std::string& fn); + + virtual uint64_t getUniqueId() const = 0; + + private: + std::string _filename; + static int _flushAll( bool sync ); // returns n flushed + protected: + virtual void close() = 0; + virtual void flush(bool sync) = 0; + /** + * returns a thread safe object that you can call flush on + * Flushable has to fail nicely if the underlying object gets killed + */ + virtual Flushable * prepareFlush() = 0; + + void created(); /* subclass must call after create */ + + /* subclass must call in destructor (or at close). + removes this from pathToFile and other maps + safe to call more than once, albeit might be wasted work + ideal to call close to the close, if the close is well before object destruction + */ + void destroyed(); + + virtual unsigned long long length() const = 0; + }; + + /** look up a MMF by filename. scoped mutex locking convention. + example: + MMFFinderByName finder; + DurableMappedFile *a = finder.find("file_name_a"); + DurableMappedFile *b = finder.find("file_name_b"); + */ + class MongoFileFinder { + MONGO_DISALLOW_COPYING(MongoFileFinder); + public: + MongoFileFinder() { } + + /** @return The MongoFile object associated with the specified file name. If no file is open + with the specified name, returns null. + */ + MongoFile* findByPath(const std::string& path) const; + + private: + LockMongoFilesShared _lk; + }; + + class MemoryMappedFile : public MongoFile { + protected: + virtual void* viewForFlushing() { + if( views.size() == 0 ) + return 0; + verify( views.size() == 1 ); + return views[0]; + } + public: + MemoryMappedFile(); + + virtual ~MemoryMappedFile() { + LockMongoFilesExclusive lk; + close(); + } + + virtual void close(); + + // Throws exception if file doesn't exist. (dm may2010: not sure if this is always true?) + void* map(const char *filename); + + /** @param options see MongoFile::Options + */ + void* mapWithOptions(const char *filename, int options); + + /* Creates with length if DNE, otherwise uses existing file length, + passed length. + @param options MongoFile::Options bits + */ + void* map(const char *filename, unsigned long long &length, int options = 0 ); + + /* Create. Must not exist. + @param zero fill file with zeros when true + */ + void* create(const std::string& filename, unsigned long long len, bool zero); + + void flush(bool sync); + virtual Flushable * prepareFlush(); + + long shortLength() const { return (long) len; } + unsigned long long length() const { return len; } + HANDLE getFd() const { return fd; } + /** create a new view with the specified properties. + automatically cleaned up upon close/destruction of the MemoryMappedFile object. + */ + void* createReadOnlyMap(); + void* createPrivateMap(); + + virtual uint64_t getUniqueId() const { return _uniqueId; } + + private: + static void updateLength( const char *filename, unsigned long long &length ); + + HANDLE fd; + HANDLE maphandle; + std::vector<void *> views; + unsigned long long len; + const uint64_t _uniqueId; +#ifdef _WIN32 + // flush Mutex + // + // Protects: + // Prevent flush() and close() from concurrently running. + // It ensures close() cannot complete while flush() is running + // Lock Ordering: + // LockMongoFilesShared must be taken before _flushMutex if both are taken + boost::mutex _flushMutex; +#endif + + protected: + + /** close the current private view and open a new replacement */ + void* remapPrivateView(void *oldPrivateAddr); + }; + + /** p is called from within a mutex that MongoFile uses. so be careful not to deadlock. */ + template < class F > + inline void MongoFile::forEach( F p ) { + LockMongoFilesShared lklk; + const std::set<MongoFile*>& mmfiles = MongoFile::getAllFiles(); + for ( std::set<MongoFile*>::const_iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) + p(*i); + } + +} // namespace mongo diff --git a/src/mongo/db/storage/mmap_v1/mmap_posix.cpp b/src/mongo/db/storage/mmap_v1/mmap_posix.cpp new file mode 100644 index 00000000000..f7dffae468f --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/mmap_posix.cpp @@ -0,0 +1,324 @@ +// mmap_posix.cpp + +/* Copyright 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl + +#include "mongo/platform/basic.h" + +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "mongo/platform/atomic_word.h" +#include "mongo/db/concurrency/d_concurrency.h" +#include "mongo/db/storage/mmap_v1/file_allocator.h" +#include "mongo/db/storage/mmap_v1/mmap.h" +#include "mongo/util/log.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/processinfo.h" +#include "mongo/util/startup_test.h" + +using std::endl; +using std::numeric_limits; +using std::vector; + +using namespace mongoutils; + +namespace { + mongo::AtomicUInt64 mmfNextId(0); +} + +namespace mongo { + static size_t fetchMinOSPageSizeBytes() { + size_t minOSPageSizeBytes = sysconf( _SC_PAGESIZE ); + minOSPageSizeBytesTest(minOSPageSizeBytes); + return minOSPageSizeBytes; + } + const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes(); + + + + MemoryMappedFile::MemoryMappedFile() : _uniqueId(mmfNextId.fetchAndAdd(1)) { + fd = 0; + maphandle = 0; + len = 0; + created(); + } + + void MemoryMappedFile::close() { + LockMongoFilesShared::assertExclusivelyLocked(); + for( vector<void*>::iterator i = views.begin(); i != views.end(); i++ ) { + munmap(*i,len); + } + views.clear(); + + if ( fd ) + ::close(fd); + fd = 0; + destroyed(); // cleans up from the master list of mmaps + } + +#ifndef O_NOATIME +#define O_NOATIME (0) +#endif + +#ifndef MAP_NORESERVE +#define MAP_NORESERVE (0) +#endif + + namespace { + void* _pageAlign( void* p ) { + return (void*)((int64_t)p & ~(g_minOSPageSizeBytes-1)); + } + + class PageAlignTest : public StartupTest { + public: + void run() { + { + int64_t x = g_minOSPageSizeBytes + 123; + void* y = _pageAlign( reinterpret_cast<void*>( x ) ); + invariant( g_minOSPageSizeBytes == reinterpret_cast<size_t>(y) ); + } + { + int64_t a = static_cast<uint64_t>( numeric_limits<int>::max() ); + a = a / g_minOSPageSizeBytes; + a = a * g_minOSPageSizeBytes; + // a should now be page aligned + + // b is not page aligned + int64_t b = a + 123; + + void* y = _pageAlign( reinterpret_cast<void*>( b ) ); + invariant( a == reinterpret_cast<int64_t>(y) ); + } + + } + } pageAlignTest; + } + +#if defined(__sun) + MAdvise::MAdvise(void *,unsigned, Advice) { } + MAdvise::~MAdvise() { } +#else + MAdvise::MAdvise(void *p, unsigned len, Advice a) { + + _p = _pageAlign( p ); + + _len = len + static_cast<unsigned>( reinterpret_cast<size_t>(p) - + reinterpret_cast<size_t>(_p) ); + + int advice = 0; + switch ( a ) { + case Sequential: + advice = MADV_SEQUENTIAL; + break; + case Random: + advice = MADV_RANDOM; + break; + } + + if ( madvise(_p,_len,advice ) ) { + error() << "madvise failed: " << errnoWithDescription(); + } + + } + MAdvise::~MAdvise() { + madvise(_p,_len,MADV_NORMAL); + } +#endif + + void* MemoryMappedFile::map(const char *filename, unsigned long long &length, int options) { + // length may be updated by callee. + setFilename(filename); + FileAllocator::get()->allocateAsap( filename, length ); + len = length; + + massert( 10446 , str::stream() << "mmap: can't map area of size 0 file: " << filename, length > 0 ); + + fd = open(filename, O_RDWR | O_NOATIME); + if ( fd <= 0 ) { + log() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl; + fd = 0; // our sentinel for not opened + return 0; + } + + unsigned long long filelen = lseek(fd, 0, SEEK_END); + uassert(10447, str::stream() << "map file alloc failed, wanted: " << length << " filelen: " << filelen << ' ' << sizeof(size_t), filelen == length ); + lseek( fd, 0, SEEK_SET ); + + void * view = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if ( view == MAP_FAILED ) { + error() << " mmap() failed for " << filename << " len:" << length << " " << errnoWithDescription() << endl; + if ( errno == ENOMEM ) { + if( sizeof(void*) == 4 ) + error() << "mmap failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl; + else + error() << "mmap failed with out of memory. (64 bit build)" << endl; + } + return 0; + } + + +#if defined(__sun) +#warning madvise not supported on solaris yet +#else + if ( options & SEQUENTIAL ) { + if ( madvise( view , length , MADV_SEQUENTIAL ) ) { + warning() << "map: madvise failed for " << filename << ' ' << errnoWithDescription() << endl; + } + } +#endif + + views.push_back( view ); + + return view; + } + + void* MemoryMappedFile::createReadOnlyMap() { + void * x = mmap( /*start*/0 , len , PROT_READ , MAP_SHARED , fd , 0 ); + if( x == MAP_FAILED ) { + if ( errno == ENOMEM ) { + if( sizeof(void*) == 4 ) + error() << "mmap ro failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl; + else + error() << "mmap ro failed with out of memory. (64 bit build)" << endl; + } + return 0; + } + return x; + } + + void* MemoryMappedFile::createPrivateMap() { + void * x = mmap( /*start*/0 , len , PROT_READ|PROT_WRITE , MAP_PRIVATE|MAP_NORESERVE , fd , 0 ); + if( x == MAP_FAILED ) { + if ( errno == ENOMEM ) { + if( sizeof(void*) == 4 ) { + error() << "mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl; + } + else { + error() << "mmap private failed with out of memory. (64 bit build)" << endl; + } + } + else { + error() << "mmap private failed " << errnoWithDescription() << endl; + } + return 0; + } + + views.push_back(x); + return x; + } + + void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) { +#if defined(__sun) // SERVER-8795 + LockMongoFilesExclusive lockMongoFiles; +#endif + + // don't unmap, just mmap over the old region + void * x = mmap( oldPrivateAddr, len , PROT_READ|PROT_WRITE , MAP_PRIVATE|MAP_NORESERVE|MAP_FIXED , fd , 0 ); + if( x == MAP_FAILED ) { + int err = errno; + error() << "13601 Couldn't remap private view: " << errnoWithDescription(err) << endl; + log() << "aborting" << endl; + printMemInfo(); + abort(); + } + verify( x == oldPrivateAddr ); + return x; + } + + void MemoryMappedFile::flush(bool sync) { + if ( views.empty() || fd == 0 ) + return; + + bool useFsync = sync && !ProcessInfo::preferMsyncOverFSync(); + + if ( useFsync ? + fsync(fd) != 0 : + msync(viewForFlushing(), len, sync ? MS_SYNC : MS_ASYNC) ) { + // msync failed, this is very bad + log() << (useFsync ? "fsync failed: " : "msync failed: ") << errnoWithDescription() + << " file: " << filename() << endl; + dataSyncFailedHandler(); + } + } + + class PosixFlushable : public MemoryMappedFile::Flushable { + public: + PosixFlushable( MemoryMappedFile* theFile, void* view , HANDLE fd , long len) + : _theFile( theFile ), _view( view ), _fd(fd), _len(len), _id(_theFile->getUniqueId()) { + } + + void flush() { + if ( _view == NULL || _fd == 0 ) + return; + + if ( ProcessInfo::preferMsyncOverFSync() ? + msync(_view, _len, MS_SYNC ) == 0 : + fsync(_fd) == 0 ) { + return; + } + + if ( errno == EBADF ) { + // ok, we were unlocked, so this file was closed + return; + } + + // some error, lets see if we're supposed to exist + LockMongoFilesShared mmfilesLock; + std::set<MongoFile*> mmfs = MongoFile::getAllFiles(); + std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile); + if ( (it == mmfs.end()) || ((*it)->getUniqueId() != _id) ) { + log() << "msync failed with: " << errnoWithDescription() + << " but file doesn't exist anymore, so ignoring"; + // this was deleted while we were unlocked + return; + } + + // we got an error, and we still exist, so this is bad, we fail + log() << "msync " << errnoWithDescription() << endl; + dataSyncFailedHandler(); + } + + MemoryMappedFile* _theFile; + void * _view; + HANDLE _fd; + long _len; + const uint64_t _id; + }; + + MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush() { + return new PosixFlushable( this, viewForFlushing(), fd, len); + } + + +} // namespace mongo + diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp index f84f7810bc3..595df3616f3 100644 --- a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp +++ b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp @@ -37,6 +37,7 @@ #include <fstream> #include "mongo/db/mongod_options.h" +#include "mongo/db/storage/mmap_v1/mmap.h" #include "mongo/db/storage/mmap_v1/data_file_sync.h" #include "mongo/db/storage/mmap_v1/dur.h" #include "mongo/db/storage/mmap_v1/dur_journal.h" @@ -46,9 +47,8 @@ #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" #include "mongo/db/storage/storage_engine_lock_file.h" #include "mongo/db/storage_options.h" -#include "mongo/util/file_allocator.h" +#include "mongo/db/storage/mmap_v1/file_allocator.h" #include "mongo/util/log.h" -#include "mongo/util/mmap.h" namespace mongo { diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp index 17a72099531..01a13ee86a9 100644 --- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp +++ b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp @@ -43,6 +43,7 @@ #include "mongo/db/storage/mmap_v1/record.h" #include "mongo/db/storage/mmap_v1/extent.h" #include "mongo/db/storage/mmap_v1/extent_manager.h" +#include "mongo/db/storage/mmap_v1/mmap.h" #include "mongo/db/storage/mmap_v1/mmap_v1_engine.h" #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" #include "mongo/db/storage/record_fetcher.h" @@ -50,7 +51,6 @@ #include "mongo/util/fail_point_service.h" #include "mongo/util/file.h" #include "mongo/util/log.h" -#include "mongo/util/mmap.h" namespace mongo { diff --git a/src/mongo/db/storage/mmap_v1/mmap_windows.cpp b/src/mongo/db/storage/mmap_v1/mmap_windows.cpp new file mode 100644 index 00000000000..2ce46d43584 --- /dev/null +++ b/src/mongo/db/storage/mmap_v1/mmap_windows.cpp @@ -0,0 +1,539 @@ +// mmap_win.cpp + +/* Copyright 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/mmap_v1/mmap.h" + +#include "mongo/db/storage/mmap_v1/durable_mapped_file.h" +#include "mongo/db/storage/mmap_v1/file_allocator.h" +#include "mongo/util/log.h" +#include "mongo/util/processinfo.h" +#include "mongo/util/text.h" +#include "mongo/util/timer.h" + +namespace mongo { + + using std::endl; + using std::string; + using std::vector; + + namespace { + mongo::AtomicUInt64 mmfNextId(0); + } + + static size_t fetchMinOSPageSizeBytes() { + SYSTEM_INFO si; + GetSystemInfo(&si); + size_t minOSPageSizeBytes = si.dwPageSize; + minOSPageSizeBytesTest(minOSPageSizeBytes); + return minOSPageSizeBytes; + } + const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes(); + + // MapViewMutex + // + // Protects: + // 1. Ensures all MapViewOfFile/UnMapViewOfFile operations are serialized to reduce chance of + // "address in use" errors (error code 487) + // - These errors can still occur if the memory is used for other purposes + // (stack storage, heap) + // 2. Prevents calls to VirtualProtect while we remapping files. + // Lock Ordering: + // - If taken, must be after previewViews._m to prevent deadlocks + mutex mapViewMutex; + + MAdvise::MAdvise(void *,unsigned, Advice) { } + MAdvise::~MAdvise() { } + + const unsigned long long memoryMappedFileLocationFloor = 256LL * 1024LL * 1024LL * 1024LL; + static unsigned long long _nextMemoryMappedFileLocation = memoryMappedFileLocationFloor; + + // nextMemoryMappedFileLocationMutex + // + // Protects: + // Windows 64-bit specific allocation of virtual memory regions for + // placing memory mapped files in memory + // Lock Ordering: + // No restrictions + static SimpleMutex _nextMemoryMappedFileLocationMutex("nextMemoryMappedFileLocationMutex"); + + unsigned long long AlignNumber(unsigned long long number, unsigned long long granularity) + { + return (number + granularity - 1) & ~(granularity - 1); + } + + static void* getNextMemoryMappedFileLocation(unsigned long long mmfSize) { + if (4 == sizeof(void*)) { + return 0; + } + SimpleMutex::scoped_lock lk(_nextMemoryMappedFileLocationMutex); + + static unsigned long long granularity = 0; + + if (0 == granularity) { + SYSTEM_INFO systemInfo; + GetSystemInfo(&systemInfo); + granularity = static_cast<unsigned long long>(systemInfo.dwAllocationGranularity); + } + + unsigned long long thisMemoryMappedFileLocation = _nextMemoryMappedFileLocation; + + int current_retry = 1; + + while (true) { + MEMORY_BASIC_INFORMATION memInfo; + + if (VirtualQuery(reinterpret_cast<LPCVOID>(thisMemoryMappedFileLocation), + &memInfo, sizeof(memInfo)) == 0) { + DWORD gle = GetLastError(); + + // If we exceed the limits of Virtual Memory + // - 8TB before Windows 8.1/2012 R2, 128 TB after + // restart scanning from our memory mapped floor once more + // This is a linear scan of regions, not of every VM page + if (gle == ERROR_INVALID_PARAMETER && current_retry == 1) { + thisMemoryMappedFileLocation = memoryMappedFileLocationFloor; + ++current_retry; + continue; + } + + log() << "VirtualQuery of " << thisMemoryMappedFileLocation + << " failed with error " << errnoWithDescription(gle); + fassertFailed(17484); + } + + // Free memory regions that we can use for memory map files + // 1. Marked MEM_FREE, not MEM_RESERVE + // 2. Marked as PAGE_NOACCESS, not anything else + if (memInfo.Protect == PAGE_NOACCESS && + memInfo.State == MEM_FREE && + memInfo.RegionSize > mmfSize) + break; + + thisMemoryMappedFileLocation = reinterpret_cast<unsigned long long>(memInfo.BaseAddress) + + memInfo.RegionSize; + } + + _nextMemoryMappedFileLocation = thisMemoryMappedFileLocation + + AlignNumber(mmfSize, granularity); + + return reinterpret_cast<void*>(static_cast<uintptr_t>(thisMemoryMappedFileLocation)); + } + + MemoryMappedFile::MemoryMappedFile() + : _uniqueId(mmfNextId.fetchAndAdd(1)), + fd(0), + maphandle(0), + len(0) { + + created(); + } + + void MemoryMappedFile::close() { + LockMongoFilesShared::assertExclusivelyLocked(); + + // Prevent flush and close from concurrently running + boost::lock_guard<boost::mutex> lk(_flushMutex); + + { + boost::lock_guard<boost::mutex> lk(mapViewMutex); + + for (vector<void*>::iterator i = views.begin(); i != views.end(); i++) { + UnmapViewOfFile(*i); + } + } + + views.clear(); + if ( maphandle ) + CloseHandle(maphandle); + maphandle = 0; + if ( fd ) + CloseHandle(fd); + fd = 0; + destroyed(); // cleans up from the master list of mmaps + } + + unsigned long long mapped = 0; + + void* MemoryMappedFile::createReadOnlyMap() { + verify( maphandle ); + + boost::lock_guard<boost::mutex> lk(mapViewMutex); + + void* readOnlyMapAddress = NULL; + int current_retry = 0; + + while (true) { + + LPVOID thisAddress = getNextMemoryMappedFileLocation(len); + + readOnlyMapAddress = MapViewOfFileEx( + maphandle, // file mapping handle + FILE_MAP_READ, // access + 0, 0, // file offset, high and low + 0, // bytes to map, 0 == all + thisAddress); // address to place file + + if (0 == readOnlyMapAddress) { + DWORD dosError = GetLastError(); + + ++current_retry; + + // If we failed to allocate a memory mapped file, try again in case we picked + // an address that Windows is also trying to use for some other VM allocations + if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) { + continue; + } + + log() << "MapViewOfFileEx for " << filename() + << " at address " << thisAddress + << " failed with error " << errnoWithDescription(dosError) + << " (file size is " << len << ")" + << " in MemoryMappedFile::createReadOnlyMap" + << endl; + + fassertFailed(16165); + } + + break; + } + + views.push_back( readOnlyMapAddress ); + return readOnlyMapAddress; + } + + void* MemoryMappedFile::map(const char *filenameIn, unsigned long long &length, int options) { + verify( fd == 0 && len == 0 ); // can't open more than once + setFilename(filenameIn); + FileAllocator::get()->allocateAsap( filenameIn, length ); + /* big hack here: Babble uses db names with colons. doesn't seem to work on windows. temporary perhaps. */ + char filename[256]; + strncpy(filename, filenameIn, 255); + filename[255] = 0; + { + size_t len = strlen( filename ); + for ( size_t i=len-1; i>=0; i-- ) { + if ( filename[i] == '/' || + filename[i] == '\\' ) + break; + + if ( filename[i] == ':' ) + filename[i] = '_'; + } + } + + updateLength( filename, length ); + + { + DWORD createOptions = FILE_ATTRIBUTE_NORMAL; + if ( options & SEQUENTIAL ) + createOptions |= FILE_FLAG_SEQUENTIAL_SCAN; + DWORD rw = GENERIC_READ | GENERIC_WRITE; + fd = CreateFileW( + toWideString(filename).c_str(), + rw, // desired access + FILE_SHARE_WRITE | FILE_SHARE_READ, // share mode + NULL, // security + OPEN_ALWAYS, // create disposition + createOptions , // flags + NULL); // hTempl + if ( fd == INVALID_HANDLE_VALUE ) { + DWORD dosError = GetLastError(); + log() << "CreateFileW for " << filename + << " failed with " << errnoWithDescription( dosError ) + << " (file size is " << length << ")" + << " in MemoryMappedFile::map" + << endl; + return 0; + } + } + + mapped += length; + + { + DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE; + maphandle = CreateFileMappingW(fd, NULL, flProtect, + length >> 32 /*maxsizehigh*/, + (unsigned) length /*maxsizelow*/, + NULL/*lpName*/); + if ( maphandle == NULL ) { + DWORD dosError = GetLastError(); + log() << "CreateFileMappingW for " << filename + << " failed with " << errnoWithDescription( dosError ) + << " (file size is " << length << ")" + << " in MemoryMappedFile::map" + << endl; + close(); + fassertFailed( 16225 ); + } + } + + void *view = 0; + { + boost::lock_guard<boost::mutex> lk(mapViewMutex); + DWORD access = ( options & READONLY ) ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS; + + int current_retry = 0; + while (true) { + + LPVOID thisAddress = getNextMemoryMappedFileLocation(length); + + view = MapViewOfFileEx( + maphandle, // file mapping handle + access, // access + 0, 0, // file offset, high and low + 0, // bytes to map, 0 == all + thisAddress); // address to place file + + if (view == 0) { + DWORD dosError = GetLastError(); + + ++current_retry; + + // If we failed to allocate a memory mapped file, try again in case we picked + // an address that Windows is also trying to use for some other VM allocations + if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) { + continue; + } + +#ifndef _WIN64 + // Warn user that if they are running a 32-bit app on 64-bit Windows + if (dosError == ERROR_NOT_ENOUGH_MEMORY) { + BOOL wow64Process; + BOOL retWow64 = IsWow64Process(GetCurrentProcess(), &wow64Process); + if (retWow64 && wow64Process) { + log() << "This is a 32-bit MongoDB binary running on a 64-bit" + " operating system that has run out of virtual memory for" + " databases. Switch to a 64-bit build of MongoDB to open" + " the databases."; + } + } +#endif + + log() << "MapViewOfFileEx for " << filename + << " at address " << thisAddress + << " failed with " << errnoWithDescription(dosError) + << " (file size is " << length << ")" + << " in MemoryMappedFile::map" + << endl; + + close(); + fassertFailed(16166); + } + + break; + } + } + + views.push_back(view); + len = length; + return view; + } + + extern mutex mapViewMutex; + + void* MemoryMappedFile::createPrivateMap() { + verify( maphandle ); + + boost::lock_guard<boost::mutex> lk(mapViewMutex); + + LPVOID thisAddress = getNextMemoryMappedFileLocation( len ); + + void* privateMapAddress = NULL; + int current_retry = 0; + + while (true) { + + privateMapAddress = MapViewOfFileEx( + maphandle, // file mapping handle + FILE_MAP_READ, // access + 0, 0, // file offset, high and low + 0, // bytes to map, 0 == all + thisAddress); // address to place file + + if (privateMapAddress == 0) { + DWORD dosError = GetLastError(); + + ++current_retry; + + // If we failed to allocate a memory mapped file, try again in case we picked + // an address that Windows is also trying to use for some other VM allocations + if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) { + continue; + } + + log() << "MapViewOfFileEx for " << filename() + << " failed with error " << errnoWithDescription(dosError) + << " (file size is " << len << ")" + << " in MemoryMappedFile::createPrivateMap" + << endl; + + fassertFailed(16167); + } + + break; + } + + views.push_back( privateMapAddress ); + return privateMapAddress; + } + + void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) { + LockMongoFilesExclusive lockMongoFiles; + + privateViews.clearWritableBits(oldPrivateAddr, len); + + boost::lock_guard<boost::mutex> lk(mapViewMutex); + + if( !UnmapViewOfFile(oldPrivateAddr) ) { + DWORD dosError = GetLastError(); + log() << "UnMapViewOfFile for " << filename() + << " failed with error " << errnoWithDescription( dosError ) + << " in MemoryMappedFile::remapPrivateView" + << endl; + fassertFailed( 16168 ); + } + + void* newPrivateView = MapViewOfFileEx( + maphandle, // file mapping handle + FILE_MAP_READ, // access + 0, 0, // file offset, high and low + 0, // bytes to map, 0 == all + oldPrivateAddr ); // we want the same address we had before + if ( 0 == newPrivateView ) { + DWORD dosError = GetLastError(); + log() << "MapViewOfFileEx for " << filename() + << " failed with error " << errnoWithDescription( dosError ) + << " (file size is " << len << ")" + << " in MemoryMappedFile::remapPrivateView" + << endl; + } + fassert( 16148, newPrivateView == oldPrivateAddr ); + return newPrivateView; + } + + class WindowsFlushable : public MemoryMappedFile::Flushable { + public: + WindowsFlushable( MemoryMappedFile* theFile, + void * view, + HANDLE fd, + const uint64_t id, + const std::string& filename, + boost::mutex& flushMutex ) + : _theFile(theFile), _view(view), _fd(fd), _id(id), _filename(filename), + _flushMutex(flushMutex) + {} + + void flush() { + if (!_view || !_fd) + return; + + { + LockMongoFilesShared mmfilesLock; + + std::set<MongoFile*> mmfs = MongoFile::getAllFiles(); + std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile); + if ( it == mmfs.end() || (*it)->getUniqueId() != _id ) { + // this was deleted while we were unlocked + return; + } + + // Hold the flush mutex to ensure the file is not closed during flush + _flushMutex.lock(); + } + + boost::lock_guard<boost::mutex> lk(_flushMutex, boost::adopt_lock_t()); + + int loopCount = 0; + bool success = false; + bool timeout = false; + int dosError = ERROR_SUCCESS; + const int maximumTimeInSeconds = 60 * 15; + Timer t; + while ( !success && !timeout ) { + ++loopCount; + success = FALSE != FlushViewOfFile( _view, 0 ); + if ( !success ) { + dosError = GetLastError(); + if ( dosError != ERROR_LOCK_VIOLATION ) { + break; + } + timeout = t.seconds() > maximumTimeInSeconds; + } + } + if ( success && loopCount > 1 ) { + log() << "FlushViewOfFile for " << _filename + << " succeeded after " << loopCount + << " attempts taking " << t.millis() + << "ms" << endl; + } + else if ( !success ) { + log() << "FlushViewOfFile for " << _filename + << " failed with error " << dosError + << " after " << loopCount + << " attempts taking " << t.millis() + << "ms" << endl; + // Abort here to avoid data corruption + fassert(16387, false); + } + + success = FALSE != FlushFileBuffers(_fd); + if (!success) { + int err = GetLastError(); + log() << "FlushFileBuffers failed: " << errnoWithDescription( err ) + << " file: " << _filename << endl; + dataSyncFailedHandler(); + } + } + + MemoryMappedFile* _theFile; // this may be deleted while we are running + void * _view; + HANDLE _fd; + const uint64_t _id; + string _filename; + boost::mutex& _flushMutex; + }; + + void MemoryMappedFile::flush(bool sync) { + uassert(13056, "Async flushing not supported on windows", sync); + if( !views.empty() ) { + WindowsFlushable f(this, viewForFlushing(), fd, _uniqueId, filename(), _flushMutex); + f.flush(); + } + } + + MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush() { + return new WindowsFlushable(this, viewForFlushing(), fd, _uniqueId, + filename(), _flushMutex); + } + +} diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp index 76eef273815..45143deb2ec 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp @@ -35,10 +35,10 @@ #include "mongo/db/operation_context_impl.h" #include "mongo/db/storage/mmap_v1/extent.h" #include "mongo/db/storage/mmap_v1/extent_manager.h" +#include "mongo/db/storage/mmap_v1/mmap.h" #include "mongo/db/storage/mmap_v1/record.h" #include "mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h" #include "mongo/util/log.h" -#include "mongo/util/mmap.h" #include "mongo/util/mongoutils/str.h" /* diff --git a/src/mongo/db/storage/mmap_v1/repair_database.cpp b/src/mongo/db/storage/mmap_v1/repair_database.cpp index e8d05a3d352..777f8f25b05 100644 --- a/src/mongo/db/storage/mmap_v1/repair_database.cpp +++ b/src/mongo/db/storage/mmap_v1/repair_database.cpp @@ -46,12 +46,12 @@ #include "mongo/db/db_raii.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/storage/mmap_v1/dur.h" +#include "mongo/db/storage/mmap_v1/mmap.h" #include "mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h" #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" #include "mongo/util/file.h" -#include "mongo/util/file_allocator.h" +#include "mongo/db/storage/mmap_v1/file_allocator.h" #include "mongo/util/log.h" -#include "mongo/util/mmap.h" #include "mongo/util/scopeguard.h" namespace mongo { diff --git a/src/mongo/db/storage/paths.cpp b/src/mongo/db/storage/paths.cpp new file mode 100644 index 00000000000..cb2913c6b06 --- /dev/null +++ b/src/mongo/db/storage/paths.cpp @@ -0,0 +1,113 @@ +/* Copyright 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/paths.h" + +#include "mongo/util/log.h" + +namespace mongo { + + /** from a full path */ + RelativePath RelativePath::fromFullPath(boost::filesystem::path dbp, + boost::filesystem::path f) { + // filesystem::path normalizes / and backslash + std::string fullpath = f.string(); + std::string relative = str::after(fullpath, dbp.string()); + if( relative.empty() ) { + log() << "warning file is not under db path? " << fullpath << ' ' << dbp.string(); + RelativePath rp; + rp._p = fullpath; + return rp; + } + if( str::startsWith(relative, "/") || str::startsWith(relative, "\\") ) { + relative.erase(0, 1); + } + RelativePath rp; + rp._p = relative; + return rp; + } + + dev_t getPartition(const std::string& path){ + struct stat stats; + + if (stat(path.c_str(), &stats) != 0){ + uasserted(13646, str::stream() << "stat() failed for file: " << path << " " << errnoWithDescription()); + } + + return stats.st_dev; + } + + void flushMyDirectory(const boost::filesystem::path& file) { +#ifdef __linux__ // this isn't needed elsewhere + static bool _warnedAboutFilesystem = false; + // if called without a fully qualified path it asserts; that makes mongoperf fail. + // so make a warning. need a better solution longer term. + // massert(13652, str::stream() << "Couldn't find parent dir for file: " << file.string(),); + if (!file.has_branch_path()) { + log() << "warning flushMyDirectory couldn't find parent dir for file: " + << file.string(); + return; + } + + + boost::filesystem::path dir = file.branch_path(); // parent_path in new boosts + + LOG(1) << "flushing directory " << dir.string(); + + int fd = ::open(dir.string().c_str(), O_RDONLY); // DO NOT THROW OR ASSERT BEFORE CLOSING + massert(13650, str::stream() << "Couldn't open directory '" << dir.string() + << "' for flushing: " << errnoWithDescription(), + fd >= 0); + if (fsync(fd) != 0) { + int e = errno; + if (e == EINVAL) { // indicates filesystem does not support synchronization + if (!_warnedAboutFilesystem) { + log() << "\tWARNING: This file system is not supported. For further information" + << " see:" + << startupWarningsLog; + log() << "\t\t\thttp://dochub.mongodb.org/core/unsupported-filesystems" + << startupWarningsLog; + log() << "\t\tPlease notify MongoDB, Inc. if an unlisted filesystem generated " + << "this warning." << startupWarningsLog; + _warnedAboutFilesystem = true; + } + } + else { + close(fd); + massert(13651, str::stream() << "Couldn't fsync directory '" << dir.string() + << "': " << errnoWithDescription(e), + false); + } + } + close(fd); +#endif + } +} diff --git a/src/mongo/db/storage/paths.h b/src/mongo/db/storage/paths.h new file mode 100644 index 00000000000..8286c920566 --- /dev/null +++ b/src/mongo/db/storage/paths.h @@ -0,0 +1,93 @@ +// @file paths.h +// file paths and directory handling + +/* Copyright 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +#include <boost/filesystem/path.hpp> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "mongo/util/mongoutils/str.h" + +#include "mongo/db/storage_options.h" + +namespace mongo { + + using namespace mongoutils; + + /** this is very much like a boost::path. however, we define a new type to get some type + checking. if you want to say 'my param MUST be a relative path", use this. + */ + struct RelativePath { + std::string _p; + + bool empty() const { return _p.empty(); } + + static RelativePath fromRelativePath(const std::string& f) { + RelativePath rp; + rp._p = f; + return rp; + } + + /** + * Returns path relative to 'dbpath' from a full path 'f'. + */ + static RelativePath fromFullPath(boost::filesystem::path dbpath, + boost::filesystem::path f); + + std::string toString() const { return _p; } + + bool operator!=(const RelativePath& r) const { return _p != r._p; } + bool operator==(const RelativePath& r) const { return _p == r._p; } + bool operator<(const RelativePath& r) const { return _p < r._p; } + + std::string asFullPath() const { + boost::filesystem::path x(storageGlobalParams.dbpath); + x /= _p; + return x.string(); + } + + }; + + dev_t getPartition(const std::string& path); + + inline bool onSamePartition(const std::string& path1, const std::string& path2){ + dev_t dev1 = getPartition(path1); + dev_t dev2 = getPartition(path2); + + return dev1 == dev2; + } + + void flushMyDirectory(const boost::filesystem::path& file); + + boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p); + +} diff --git a/src/mongo/db/storage/storage_engine_lock_file_posix.cpp b/src/mongo/db/storage/storage_engine_lock_file_posix.cpp index 93227bbaf9a..6897f413fbe 100644 --- a/src/mongo/db/storage/storage_engine_lock_file_posix.cpp +++ b/src/mongo/db/storage/storage_engine_lock_file_posix.cpp @@ -41,9 +41,9 @@ #include <unistd.h> #include <sstream> +#include "mongo/db/storage/paths.h" #include "mongo/platform/process_id.h" #include "mongo/util/log.h" -#include "mongo/util/paths.h" #include "mongo/util/mongoutils/str.h" namespace mongo { |