// mmap_posix.cpp
/* Copyright 2009 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects
* for all of the code used other than as permitted herein. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you do not
* wish to do so, delete this exception statement from your version. If you
* delete this exception statement from all source files in the program,
* then also delete it in the license file.
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
#include "mongo/platform/basic.h"
#include
#include
#include
#include
#include
#include "mongo/platform/atomic_word.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/storage/mmap_v1/file_allocator.h"
#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/processinfo.h"
#include "mongo/util/startup_test.h"
using std::endl;
using std::numeric_limits;
using std::vector;
using namespace mongoutils;
namespace mongo {
static size_t fetchMinOSPageSizeBytes() {
size_t minOSPageSizeBytes = sysconf(_SC_PAGESIZE);
minOSPageSizeBytesTest(minOSPageSizeBytes);
return minOSPageSizeBytes;
}
const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes();
void MemoryMappedFile::close() {
LockMongoFilesShared::assertExclusivelyLocked();
for (vector::iterator i = views.begin(); i != views.end(); i++) {
munmap(*i, len);
}
views.clear();
if (fd)
::close(fd);
fd = 0;
destroyed(); // cleans up from the master list of mmaps
}
#ifndef O_NOATIME
#define O_NOATIME (0)
#endif
#ifndef MAP_NORESERVE
#define MAP_NORESERVE (0)
#endif
namespace {
void* _pageAlign(void* p) {
return (void*)((int64_t)p & ~(g_minOSPageSizeBytes - 1));
}
class PageAlignTest : public StartupTest {
public:
void run() {
{
int64_t x = g_minOSPageSizeBytes + 123;
void* y = _pageAlign(reinterpret_cast(x));
invariant(g_minOSPageSizeBytes == reinterpret_cast(y));
}
{
int64_t a = static_cast(numeric_limits::max());
a = a / g_minOSPageSizeBytes;
a = a * g_minOSPageSizeBytes;
// a should now be page aligned
// b is not page aligned
int64_t b = a + 123;
void* y = _pageAlign(reinterpret_cast(b));
invariant(a == reinterpret_cast(y));
}
}
} pageAlignTest;
}
#if defined(__sun)
MAdvise::MAdvise(void*, unsigned, Advice) {}
MAdvise::~MAdvise() {}
#else
MAdvise::MAdvise(void* p, unsigned len, Advice a) {
_p = _pageAlign(p);
_len = len + static_cast(reinterpret_cast(p) - reinterpret_cast(_p));
int advice = 0;
switch (a) {
case Sequential:
advice = MADV_SEQUENTIAL;
break;
case Random:
advice = MADV_RANDOM;
break;
}
if (madvise(_p, _len, advice)) {
error() << "madvise failed: " << errnoWithDescription();
}
}
MAdvise::~MAdvise() {
madvise(_p, _len, MADV_NORMAL);
}
#endif
void* MemoryMappedFile::map(const char* filename, unsigned long long& length) {
// length may be updated by callee.
setFilename(filename);
FileAllocator::get()->allocateAsap(filename, length);
len = length;
const bool readOnly = isOptionSet(READONLY);
massert(
10446, str::stream() << "mmap: can't map area of size 0 file: " << filename, length > 0);
const int posixOpenOpts = O_NOATIME | (readOnly ? O_RDONLY : O_RDWR);
fd = ::open(filename, posixOpenOpts);
if (fd <= 0) {
log() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
fd = 0; // our sentinel for not opened
return 0;
}
unsigned long long filelen = lseek(fd, 0, SEEK_END);
uassert(10447,
str::stream() << "map file alloc failed, wanted: " << length << " filelen: " << filelen
<< ' ' << sizeof(size_t),
filelen == length);
lseek(fd, 0, SEEK_SET);
const int mmapProtectionOpts = readOnly ? PROT_READ : (PROT_READ | PROT_WRITE);
void* view = mmap(NULL, length, mmapProtectionOpts, MAP_SHARED, fd, 0);
if (view == MAP_FAILED) {
error() << " mmap() failed for " << filename << " len:" << length << " "
<< errnoWithDescription() << endl;
if (errno == ENOMEM) {
if (sizeof(void*) == 4)
error() << "mmap failed with out of memory. You are using a 32-bit build and "
"probably need to upgrade to 64" << endl;
else
error() << "mmap failed with out of memory. (64 bit build)" << endl;
}
return 0;
}
#if !defined(__sun)
if (isOptionSet(SEQUENTIAL)) {
if (madvise(view, length, MADV_SEQUENTIAL)) {
warning() << "map: madvise failed for " << filename << ' ' << errnoWithDescription()
<< endl;
}
}
#endif
views.push_back(view);
return view;
}
void* MemoryMappedFile::createPrivateMap() {
void* x = mmap(/*start*/ 0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_NORESERVE, fd, 0);
if (x == MAP_FAILED) {
if (errno == ENOMEM) {
if (sizeof(void*) == 4) {
error() << "mmap private failed with out of memory. You are using a 32-bit build "
"and probably need to upgrade to 64" << endl;
} else {
error() << "mmap private failed with out of memory. (64 bit build)" << endl;
}
} else {
error() << "mmap private failed " << errnoWithDescription() << endl;
}
return 0;
}
views.push_back(x);
return x;
}
void* MemoryMappedFile::remapPrivateView(void* oldPrivateAddr) {
#if defined(__sun) // SERVER-8795
LockMongoFilesExclusive lockMongoFiles;
#endif
// don't unmap, just mmap over the old region
void* x = mmap(oldPrivateAddr,
len,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_NORESERVE | MAP_FIXED,
fd,
0);
if (x == MAP_FAILED) {
int err = errno;
error() << "13601 Couldn't remap private view: " << errnoWithDescription(err) << endl;
log() << "aborting" << endl;
printMemInfo();
abort();
}
verify(x == oldPrivateAddr);
return x;
}
void MemoryMappedFile::flush(bool sync) {
if (views.empty() || fd == 0 || !sync)
return;
bool useFsync = !ProcessInfo::preferMsyncOverFSync();
if (useFsync ? fsync(fd) != 0 : msync(viewForFlushing(), len, MS_SYNC) != 0) {
// msync failed, this is very bad
log() << (useFsync ? "fsync failed: " : "msync failed: ") << errnoWithDescription()
<< " file: " << filename() << endl;
dataSyncFailedHandler();
}
}
class PosixFlushable : public MemoryMappedFile::Flushable {
public:
PosixFlushable(MemoryMappedFile* theFile, void* view, HANDLE fd, long len)
: _theFile(theFile), _view(view), _fd(fd), _len(len), _id(_theFile->getUniqueId()) {}
void flush() {
if (_view == NULL || _fd == 0)
return;
if (ProcessInfo::preferMsyncOverFSync() ? msync(_view, _len, MS_SYNC) == 0
: fsync(_fd) == 0) {
return;
}
if (errno == EBADF) {
// ok, we were unlocked, so this file was closed
return;
}
// some error, lets see if we're supposed to exist
LockMongoFilesShared mmfilesLock;
std::set mmfs = MongoFile::getAllFiles();
std::set::const_iterator it = mmfs.find(_theFile);
if ((it == mmfs.end()) || ((*it)->getUniqueId() != _id)) {
log() << "msync failed with: " << errnoWithDescription()
<< " but file doesn't exist anymore, so ignoring";
// this was deleted while we were unlocked
return;
}
// we got an error, and we still exist, so this is bad, we fail
log() << "msync " << errnoWithDescription() << endl;
dataSyncFailedHandler();
}
MemoryMappedFile* _theFile;
void* _view;
HANDLE _fd;
long _len;
const uint64_t _id;
};
MemoryMappedFile::Flushable* MemoryMappedFile::prepareFlush() {
return new PosixFlushable(this, viewForFlushing(), fd, len);
}
} // namespace mongo