// mmap_v1_engine.cpp
/**
* Copyright (C) 2014 MongoDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
#include "mongo/db/storage/mmap_v1/mmap_v1_engine.h"
#include
#include
#include
#include "mongo/db/mongod_options.h"
#include "mongo/db/storage/mmap_v1/data_file_sync.h"
#include "mongo/db/storage/mmap_v1/dur.h"
#include "mongo/db/storage/mmap_v1/dur_journal.h"
#include "mongo/db/storage/mmap_v1/dur_recover.h"
#include "mongo/db/storage/mmap_v1/dur_recovery_unit.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage/storage_engine_lock_file.h"
#include "mongo/db/storage_options.h"
#include "mongo/util/file_allocator.h"
#include "mongo/util/log.h"
#include "mongo/util/mmap.h"
namespace mongo {
using std::endl;
using std::ifstream;
using std::string;
using std::stringstream;
using std::vector;
namespace {
#if !defined(__sunos__)
// if doingRepair is true don't consider unclean shutdown an error
void acquirePathLock(MMAPV1Engine* storageEngine,
bool doingRepair,
const StorageEngineLockFile& lockFile) {
string name = lockFile.getFilespec();
bool oldFile = lockFile.createdByUncleanShutdown();
if (oldFile) {
// we check this here because we want to see if we can get the lock
// if we can't, then its probably just another mongod running
string errmsg;
if (doingRepair && dur::haveJournalFiles()) {
errmsg =
"************** \n"
"You specified --repair but there are dirty journal files. Please\n"
"restart without --repair to allow the journal files to be replayed.\n"
"If you wish to repair all databases, please shutdown cleanly and\n"
"run with --repair again.\n"
"**************";
} else if (storageGlobalParams.dur) {
if (!dur::haveJournalFiles(/*anyFiles=*/true)) {
// Passing anyFiles=true as we are trying to protect against starting in an
// unclean state with the journal directory unmounted. If there are any files,
// even prealloc files, then it means that it is mounted so we can continue.
// Previously there was an issue (SERVER-5056) where we would fail to start up
// if killed during prealloc.
vector dbnames;
storageEngine->listDatabases(&dbnames);
if (dbnames.size() == 0) {
// this means that mongod crashed
// between initial startup and when journaling was initialized
// it is safe to continue
} else {
errmsg = str::stream()
<< "************** \n"
<< "old lock file: " << name << ". probably means unclean shutdown,\n"
<< "but there are no journal files to recover.\n"
<< "this is likely human error or filesystem corruption.\n"
<< "please make sure that your journal directory is mounted.\n"
<< "found " << dbnames.size() << " dbs.\n"
<< "see: http://dochub.mongodb.org/core/repair for more information\n"
<< "*************";
}
}
} else {
if (!dur::haveJournalFiles() && !doingRepair) {
errmsg = str::stream() << "************** \n"
<< "Unclean shutdown detected.\n"
<< "Please visit http://dochub.mongodb.org/core/repair for "
"recovery instructions.\n"
<< "*************";
}
}
if (!errmsg.empty()) {
log() << errmsg << endl;
uassert(12596, "old lock file", 0);
}
}
// Not related to lock file, but this is where we handle unclean shutdown
if (!storageGlobalParams.dur && dur::haveJournalFiles()) {
log() << "**************" << endl;
log() << "Error: journal files are present in journal directory, yet starting without "
"journaling enabled." << endl;
log() << "It is recommended that you start with journaling enabled so that recovery may "
"occur." << endl;
log() << "**************" << endl;
uasserted(13597, "can't start without --journal enabled when journal/ files are present");
}
}
#else
void acquirePathLock(MMAPV1Engine* storageEngine,
bool doingRepair,
const StorageEngineLockFile& lockFile) {
// TODO - this is very bad that the code above not running here.
// Not related to lock file, but this is where we handle unclean shutdown
if (!storageGlobalParams.dur && dur::haveJournalFiles()) {
log() << "**************" << endl;
log() << "Error: journal files are present in journal directory, yet starting without "
"--journal enabled." << endl;
log() << "It is recommended that you start with journaling enabled so that recovery may "
"occur." << endl;
log() << "Alternatively (not recommended), you can backup everything, then delete the "
"journal files, and run --repair" << endl;
log() << "**************" << endl;
uasserted(13618, "can't start without --journal enabled when journal/ files are present");
}
}
#endif // !defined(__sunos__)
/// warn if readahead > 256KB (gridfs chunk size)
void checkReadAhead(const string& dir) {
#ifdef __linux__
try {
const dev_t dev = getPartition(dir);
// This path handles the case where the filesystem uses the whole device (including LVM)
string path = str::stream() << "/sys/dev/block/" << major(dev) << ':' << minor(dev)
<< "/queue/read_ahead_kb";
if (!boost::filesystem::exists(path)) {
// This path handles the case where the filesystem is on a partition.
path =
str::stream() << "/sys/dev/block/" << major(dev) << ':'
<< minor(dev) // this is a symlink
<< "/.." // parent directory of a partition is for the whole device
<< "/queue/read_ahead_kb";
}
if (boost::filesystem::exists(path)) {
ifstream file(path.c_str());
if (file.is_open()) {
int kb;
file >> kb;
if (kb > 256) {
log() << startupWarningsLog;
log() << "** WARNING: Readahead for " << dir << " is set to " << kb << "KB"
<< startupWarningsLog;
log() << "** We suggest setting it to 256KB (512 sectors) or less"
<< startupWarningsLog;
log() << "** http://dochub.mongodb.org/core/readahead"
<< startupWarningsLog;
}
}
}
} catch (const std::exception& e) {
log() << "unable to validate readahead settings due to error: " << e.what()
<< startupWarningsLog;
log() << "for more information, see http://dochub.mongodb.org/core/readahead"
<< startupWarningsLog;
}
#endif // __linux__
}
// This is unrelated to the _tmp directory in dbpath.
void clearTmpFiles() {
boost::filesystem::path path(storageGlobalParams.dbpath);
for (boost::filesystem::directory_iterator i(path);
i != boost::filesystem::directory_iterator();
++i) {
string fileName = boost::filesystem::path(*i).leaf().string();
if (boost::filesystem::is_directory(*i) && fileName.length() && fileName[0] == '$')
boost::filesystem::remove_all(*i);
}
}
} // namespace
MMAPV1Engine::MMAPV1Engine(const StorageEngineLockFile& lockFile) {
// TODO check non-journal subdirs if using directory-per-db
checkReadAhead(storageGlobalParams.dbpath);
acquirePathLock(this, storageGlobalParams.repair, lockFile);
FileAllocator::get()->start();
MONGO_ASSERT_ON_EXCEPTION_WITH_MSG(clearTmpFiles(), "clear tmp files");
}
void MMAPV1Engine::finishInit() {
dataFileSync.go();
// Replays the journal (if needed) and starts the background thread. This requires the
// ability to create OperationContexts.
dur::startup();
}
MMAPV1Engine::~MMAPV1Engine() {
for (EntryMap::const_iterator it = _entryMap.begin(); it != _entryMap.end(); ++it) {
delete it->second;
}
_entryMap.clear();
}
RecoveryUnit* MMAPV1Engine::newRecoveryUnit() {
return new DurRecoveryUnit();
}
void MMAPV1Engine::listDatabases(std::vector* out) const {
_listDatabases(storageGlobalParams.dbpath, out);
}
DatabaseCatalogEntry* MMAPV1Engine::getDatabaseCatalogEntry(OperationContext* opCtx,
const StringData& db) {
{
boost::mutex::scoped_lock lk(_entryMapMutex);
EntryMap::const_iterator iter = _entryMap.find(db.toString());
if (iter != _entryMap.end()) {
return iter->second;
}
}
// This is an on-demand database create/open. At this point, we are locked under X lock for
// the database (MMAPV1DatabaseCatalogEntry's constructor checks that) so no two threads
// can be creating the same database concurrenty. We need to create the database outside of
// the _entryMapMutex so we do not deadlock (see SERVER-15880).
MMAPV1DatabaseCatalogEntry* entry = new MMAPV1DatabaseCatalogEntry(
opCtx, db, storageGlobalParams.dbpath, storageGlobalParams.directoryperdb, false);
boost::mutex::scoped_lock lk(_entryMapMutex);
// Sanity check that we are not overwriting something
invariant(_entryMap.insert(EntryMap::value_type(db.toString(), entry)).second);
return entry;
}
Status MMAPV1Engine::closeDatabase(OperationContext* txn, const StringData& db) {
// Before the files are closed, flush any potentially outstanding changes, which might
// reference this database. Otherwise we will assert when subsequent applications of the
// global journal entries occur, which happen to have write intents for the removed files.
getDur().syncDataAndTruncateJournal(txn);
boost::mutex::scoped_lock lk(_entryMapMutex);
MMAPV1DatabaseCatalogEntry* entry = _entryMap[db.toString()];
delete entry;
_entryMap.erase(db.toString());
return Status::OK();
}
Status MMAPV1Engine::dropDatabase(OperationContext* txn, const StringData& db) {
Status status = closeDatabase(txn, db);
if (!status.isOK())
return status;
_deleteDataFiles(db.toString());
return Status::OK();
}
void MMAPV1Engine::_listDatabases(const std::string& directory, std::vector* out) {
boost::filesystem::path path(directory);
for (boost::filesystem::directory_iterator i(path);
i != boost::filesystem::directory_iterator();
++i) {
if (storageGlobalParams.directoryperdb) {
boost::filesystem::path p = *i;
string dbName = p.leaf().string();
p /= (dbName + ".ns");
if (exists(p))
out->push_back(dbName);
} else {
string fileName = boost::filesystem::path(*i).leaf().string();
if (fileName.length() > 3 && fileName.substr(fileName.length() - 3, 3) == ".ns")
out->push_back(fileName.substr(0, fileName.length() - 3));
}
}
}
int MMAPV1Engine::flushAllFiles(bool sync) {
return MongoFile::flushAll(sync);
}
bool MMAPV1Engine::isDurable() const {
return getDur().isDurable();
}
RecordAccessTracker& MMAPV1Engine::getRecordAccessTracker() {
return _recordAccessTracker;
}
void MMAPV1Engine::cleanShutdown() {
// wait until file preallocation finishes
// we would only hang here if the file_allocator code generates a
// synchronous signal, which we don't expect
log() << "shutdown: waiting for fs preallocator..." << endl;
FileAllocator::get()->waitUntilFinished();
if (storageGlobalParams.dur) {
log() << "shutdown: final commit..." << endl;
getDur().commitAndStopDurThread();
}
log() << "shutdown: closing all files..." << endl;
stringstream ss3;
MemoryMappedFile::closeAllFiles(ss3);
log() << ss3.str() << endl;
}
}