diff options
author | Eliot Horowitz <eliot@10gen.com> | 2013-07-18 09:35:32 -0400 |
---|---|---|
committer | Eliot Horowitz <eliot@10gen.com> | 2013-07-18 09:35:32 -0400 |
commit | c95d4bc5744dc6aaa9abf202ebee0bee2ffa5463 (patch) | |
tree | 425674c15ddd5343ba88c0f612e2c2603ce65fd9 /src/mongo | |
parent | a2960637041c11bc1d8b925e5a99975b7fe9485e (diff) | |
download | mongo-c95d4bc5744dc6aaa9abf202ebee0bee2ffa5463.tar.gz |
SERVER-6405: lots of prep work for Collection class
splitting pdfile into more components
added ExtentManager which should be the only way to do DiskLoc -> Record
Diffstat (limited to 'src/mongo')
52 files changed, 1330 insertions, 983 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript index a5f4ea198d1..eb028dd5eaf 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -426,7 +426,7 @@ serverOnlyFiles = [ "db/curop.cpp", "util/alignedbuilder.cpp", "util/elapsed_tracker.cpp", "util/touch_pages.cpp", - "db/mongommf.cpp", + "db/storage/durable_mapped_file.cpp", "db/dur.cpp", "db/durop.cpp", "db/dur_writetodatafiles.cpp", @@ -493,8 +493,12 @@ serverOnlyFiles = [ "db/curop.cpp", "db/instance.cpp", "db/client.cpp", "db/database.cpp", + "db/database_holder.cpp", "db/background.cpp", "db/pdfile.cpp", + "db/storage/data_file.cpp", + "db/storage/extent.cpp", + "db/storage/extent_manager.cpp", "db/cursor.cpp", "db/query_optimizer.cpp", "db/query_optimizer_internal.cpp", diff --git a/src/mongo/db/commands/storage_details.cpp b/src/mongo/db/commands/storage_details.cpp index 4ea51f0ebd9..71bfd747b8d 100644 --- a/src/mongo/db/commands/storage_details.cpp +++ b/src/mongo/db/commands/storage_details.cpp @@ -543,8 +543,11 @@ namespace { recordsArrayBuilder.reset(new BSONArrayBuilder(result.subarrayStart("records"))); } + Database* db = cc().database(); + ExtentManager& extentManager = db->getExtentManager(); + DiskLoc prevDl = ex->firstRecord; - for (DiskLoc dl = ex->firstRecord; ! dl.isNull(); dl = r->nextInExtent(dl)) { + for (DiskLoc dl = ex->firstRecord; !dl.isNull(); dl = extentManager.getNextRecordInExtent(dl)) { r = dl.rec(); processRecord(dl, prevDl, r, extentOfs, params, sliceData, recordsArrayBuilder.get()); diff --git a/src/mongo/db/compact.cpp b/src/mongo/db/compact.cpp index 77cfd7a2413..8f7d1ac0267 100644 --- a/src/mongo/db/compact.cpp +++ b/src/mongo/db/compact.cpp @@ -69,12 +69,14 @@ namespace mongo { verify( e->validates(diskloc) ); unsigned skipped = 0; + Database* db = cc().database(); + { // the next/prev pointers within the extent might not be in order so we first page the whole thing in // sequentially log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl; Timer t; - MongoDataFile* mdf = cc().database()->getFile( diskloc.a() ); + DataFile* mdf = db->getFile( diskloc.a() ); HANDLE fd = mdf->getFd(); int offset = diskloc.getOfs(); Extent* ext = diskloc.ext(); @@ -94,7 +96,7 @@ namespace mongo { if( !L.isNull() ) { while( 1 ) { Record *recOld = L.rec(); - L = recOld->nextInExtent(L); + L = db->getExtentManager().getNextRecordInExtent(L); BSONObj objOld = BSONObj::make(recOld); if( !validate || objOld.valid() ) { diff --git a/src/mongo/db/database.cpp b/src/mongo/db/database.cpp index 2f69f217e24..2fc3dde8a24 100644 --- a/src/mongo/db/database.cpp +++ b/src/mongo/db/database.cpp @@ -24,74 +24,74 @@ #include "mongo/db/auth/auth_index_d.h" #include "mongo/db/clientcursor.h" -#include "mongo/db/databaseholder.h" +#include "mongo/db/database_holder.h" #include "mongo/db/instance.h" #include "mongo/db/introspect.h" #include "mongo/db/pdfile.h" namespace mongo { - void assertDbAtLeastReadLocked(const Database *db) { - if( db ) { - Lock::assertAtLeastReadLocked(db->name()); - } - else { - verify( Lock::isLocked() ); - } - } - - void assertDbWriteLocked(const Database *db) { - if( db ) { - Lock::assertWriteLocked(db->name()); - } - else { - verify( Lock::isW() ); - } - } Database::~Database() { verify( Lock::isW() ); _magic = 0; - size_t n = _files.size(); - for ( size_t i = 0; i < n; i++ ) - delete _files[i]; if( _ccByLoc.size() ) { log() << "\n\n\nWARNING: ccByLoc not empty on database close! " << _ccByLoc.size() << ' ' << _name << endl; } } + Status Database::validateDBName( const StringData& dbname ) { + + if ( dbname.size() <= 0 ) + return Status( ErrorCodes::BadValue, "db name is empty" ); + + if ( dbname.size() >= 64 ) + return Status( ErrorCodes::BadValue, "db name is too long" ); + + if ( dbname.find( '.' ) != string::npos ) + return Status( ErrorCodes::BadValue, "db name cannot contain a ." ); + + if ( dbname.find( ' ' ) != string::npos ) + return Status( ErrorCodes::BadValue, "db name cannot contain a space" ); + +#ifdef _WIN32 + static const char* windowsReservedNames[] = { + "con", "prn", "aux", "nul", + "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", + "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" + }; + + string lower = toLowerString( dbname.toString() ); + for ( size_t i = 0; i < (sizeof(windowsReservedNames) / sizeof(char*)); ++i ) { + if ( lower == windowsReservedNames[i] ) { + stringstream errorString; + errorString << "db name \"" << nm << "\" is a reserved name"; + return Status( ErrorCodes::BadValue, errorString.str() ); + } + } +#endif + + return Status::OK(); + } + Database::Database(const char *nm, bool& newDb, const string& path ) - : _name(nm), _path(path), _namespaceIndex( _path, _name ), + : _name(nm), _path(path), + _namespaceIndex( _path, _name ), + _extentManager( _name, _path ), _profileName(_name + ".system.profile") { + Status status = validateDBName( _name ); + if ( !status.isOK() ) { + warning() << "tried to open invalid db: " << _name << endl; + uasserted( 10028, status.toString() ); + } + try { - { - // check db name is valid - size_t L = strlen(nm); - uassert( 10028 , "db name is empty", L > 0 ); - uassert( 10032 , "db name too long", L < 64 ); - uassert( 10029 , "bad db name [1]", *nm != '.' ); - uassert( 10030 , "bad db name [2]", nm[L-1] != '.' ); - uassert( 10031 , "bad char(s) in db name", strchr(nm, ' ') == 0 ); -#ifdef _WIN32 - static const char* windowsReservedNames[] = { - "con", "prn", "aux", "nul", - "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", - "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" - }; - for ( size_t i = 0; i < (sizeof(windowsReservedNames) / sizeof(char*)); ++i ) { - if ( strcasecmp( nm, windowsReservedNames[i] ) == 0 ) { - stringstream errorString; - errorString << "db name \"" << nm << "\" is a reserved name"; - uassert( 16185 , errorString.str(), false ); - } - } -#endif - } newDb = _namespaceIndex.exists(); _profile = cmdLine.defaultProfile; checkDuplicateUncasedNames(true); + // If already exists, open. Otherwise behave as if empty until // there's a write, then open. if (!newDb) { @@ -109,11 +109,7 @@ namespace mongo { else { log() << e.what() << endl; } - // since destructor won't be called: - for ( size_t i = 0; i < _files.size(); i++ ) { - delete _files[i]; - } - _files.clear(); + _extentManager.reset(); throw; } } @@ -163,17 +159,7 @@ namespace mongo { return ""; } - boost::filesystem::path Database::fileName( int n ) const { - stringstream ss; - ss << _name << '.' << n; - boost::filesystem::path fullName; - fullName = boost::filesystem::path(_path); - if ( directoryperdb ) - fullName /= _name; - fullName /= ss.str(); - return fullName; - } - + /* bool Database::openExistingFile( int n ) { verify(this); Lock::assertWriteLocked(_name); @@ -202,7 +188,7 @@ namespace mongo { { boost::filesystem::path fullName = fileName( n ); string fullNameString = fullName.string(); - MongoDataFile *df = new MongoDataFile(n); + DataFile *df = new DataFile(n); try { if( !df->openExisting( fullNameString.c_str() ) ) { delete df; @@ -221,15 +207,15 @@ namespace mongo { return true; } - + */ // todo : we stop once a datafile dne. - // if one datafile were missing we should keep going for + // if one datafile were missing we should keep going for // repair purposes yet we do not. void Database::openAllFiles() { verify(this); - int n = 0; - while( openExistingFile(n) ) { - n++; + Status s = _extentManager.init(); + if ( !s.isOK() ) { + msgasserted( 16966, str::stream() << "_extentManager.init failed: " << s.toString() ); } } @@ -276,69 +262,6 @@ namespace mongo { } } - // todo: this is called a lot. streamline the common case - MongoDataFile* Database::getFile( int n, int sizeNeeded , bool preallocateOnly) { - verify(this); - DEV assertDbAtLeastReadLocked(this); - - _namespaceIndex.init(); - if ( n < 0 || n >= DiskLoc::MaxFiles ) { - out() << "getFile(): n=" << n << endl; - massert( 10295 , "getFile(): bad file number value (corrupt db?): run repair", false); - } - DEV { - if ( n > 100 ) { - out() << "getFile(): n=" << n << endl; - } - } - MongoDataFile* p = 0; - if ( !preallocateOnly ) { - while ( n >= (int) _files.size() ) { - verify(this); - if( !Lock::isWriteLocked(this->_name) ) { - log() << "error: getFile() called in a read lock, yet file to return is not yet open" << endl; - log() << " getFile(" << n << ") _files.size:" <<_files.size() << ' ' << fileName(n).string() << endl; - log() << " context ns: " << cc().ns() << endl; - verify(false); - } - _files.push_back(0); - } - p = _files[n]; - } - if ( p == 0 ) { - assertDbWriteLocked(this); - boost::filesystem::path fullName = fileName( n ); - string fullNameString = fullName.string(); - p = new MongoDataFile(n); - int minSize = 0; - if ( n != 0 && _files[ n - 1 ] ) - minSize = _files[ n - 1 ]->getHeader()->fileLength; - if ( sizeNeeded + DataFileHeader::HeaderSize > minSize ) - minSize = sizeNeeded + DataFileHeader::HeaderSize; - try { - p->open( fullNameString.c_str(), minSize, preallocateOnly ); - } - catch ( AssertionException& ) { - delete p; - throw; - } - if ( preallocateOnly ) - delete p; - else - _files[n] = p; - } - return preallocateOnly ? 0 : p; - } - - MongoDataFile* Database::addAFile( int sizeNeeded, bool preallocateNextFile ) { - assertDbWriteLocked(this); - int n = (int) _files.size(); - MongoDataFile *ret = getFile( n, sizeNeeded ); - if ( preallocateNextFile ) - preallocateAFile(); - return ret; - } - bool fileIndexExceedsQuota( const char *ns, int fileIndex, bool enforceQuota ) { return cmdLine.quota && @@ -349,12 +272,12 @@ namespace mongo { !NamespaceString::special( ns ) && nsToDatabaseSubstring( ns ) != "local"; } - - MongoDataFile* Database::suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ) { + + DataFile* Database::suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ) { // check existing files for ( int i=numFiles()-1; i>=0; i-- ) { - MongoDataFile* f = getFile( i ); + DataFile* f = getFile( i ); if ( f->getHeader()->unusedLength >= sizeNeeded ) { if ( fileIndexExceedsQuota( ns, i-1, enforceQuota ) ) // NOTE i-1 is the value used historically for this check. ; @@ -375,12 +298,12 @@ namespace mongo { // allocate files until we either get one big enough or hit maxSize for ( int i = 0; i < 8; i++ ) { - MongoDataFile* f = addAFile( sizeNeeded, preallocate ); + DataFile* f = addAFile( sizeNeeded, preallocate ); if ( f->getHeader()->unusedLength >= sizeNeeded ) return f; - if ( f->getHeader()->fileLength >= MongoDataFile::maxSize() ) // this is as big as they get so might as well stop + if ( f->getHeader()->fileLength >= DataFile::maxSize() ) // this is as big as they get so might as well stop return f; } @@ -388,14 +311,6 @@ namespace mongo { return 0; } - MongoDataFile* Database::newestFile() { - int n = numFiles(); - if ( n == 0 ) - return 0; - return getFile(n-1); - } - - Extent* Database::allocExtent( const char *ns, int size, bool capped, bool enforceQuota ) { // todo: when profiling, these may be worth logging into profile collection bool fromFreeList = true; @@ -432,74 +347,5 @@ namespace mongo { return true; } - bool Database::exists(int n) const { - return boost::filesystem::exists( fileName( n ) ); - } - - int Database::numFiles() const { - DEV assertDbAtLeastReadLocked(this); - return (int) _files.size(); - } - - void Database::flushFiles( bool sync ) { - assertDbAtLeastReadLocked(this); - for( vector<MongoDataFile*>::iterator i = _files.begin(); i != _files.end(); i++ ) { - MongoDataFile *f = *i; - f->flush(sync); - } - } - - long long Database::fileSize() const { - long long size=0; - for (int n=0; exists(n); n++) - size += boost::filesystem::file_size( fileName(n) ); - return size; - } - - Database* DatabaseHolder::getOrCreate( const string& ns , const string& path , bool& justCreated ) { - string dbname = _todb( ns ); - { - SimpleMutex::scoped_lock lk(_m); - Lock::assertAtLeastReadLocked(ns); - DBs& m = _paths[path]; - { - DBs::iterator i = m.find(dbname); - if( i != m.end() ) { - justCreated = false; - return i->second; - } - } - - // todo: protect against getting sprayed with requests for different db names that DNE - - // that would make the DBs map very large. not clear what to do to handle though, - // perhaps just log it, which is what we do here with the "> 40" : - bool cant = !Lock::isWriteLocked(ns); - if( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) || m.size() > 40 || cant || DEBUG_BUILD ) { - log() << "opening db: " << (path==dbpath?"":path) << ' ' << dbname << endl; - } - massert(15927, "can't open database in a read lock. if db was just closed, consider retrying the query. might otherwise indicate an internal error", !cant); - } - - // we mark our thread as having done writes now as we do not want any exceptions - // once we start creating a new database - cc().writeHappened(); - - // this locks _m for defensive checks, so we don't want to be locked right here : - Database *db = new Database( dbname.c_str() , justCreated , path ); - - { - SimpleMutex::scoped_lock lk(_m); - DBs& m = _paths[path]; - verify( m[dbname] == 0 ); - m[dbname] = db; - _size++; - } - - authindex::configureSystemIndexes(dbname); - - db->clearTmpCollections(); - - return db; - } } // namespace mongo diff --git a/src/mongo/db/database.h b/src/mongo/db/database.h index 5f059b4438c..80b7415bf79 100644 --- a/src/mongo/db/database.h +++ b/src/mongo/db/database.h @@ -22,11 +22,12 @@ #include "mongo/db/cmdline.h" #include "mongo/db/namespace_details.h" #include "mongo/db/record.h" +#include "mongo/db/storage/extent_manager.h" namespace mongo { class Extent; - class MongoDataFile; + class DataFile; /** * Database represents a database database @@ -58,40 +59,38 @@ namespace mongo { /** * total file size of Database in bytes */ - long long fileSize() const; + long long fileSize() const { return _extentManager.fileSize(); } - int numFiles() const; - - /** - * returns file valid for file number n - */ - boost::filesystem::path fileName( int n ) const; + int numFiles() const { return _extentManager.numFiles(); } /** * return file n. if it doesn't exist, create it */ - MongoDataFile* getFile( int n, int sizeNeeded = 0, bool preallocateOnly = false ); + DataFile* getFile( int n, int sizeNeeded = 0, bool preallocateOnly = false ) { + _namespaceIndex.init(); + return _extentManager.getFile( n, sizeNeeded, preallocateOnly ); + } - MongoDataFile* addAFile( int sizeNeeded, bool preallocateNextFile ); + DataFile* addAFile( int sizeNeeded, bool preallocateNextFile ) { + return _extentManager.addAFile( sizeNeeded, preallocateNextFile ); + } /** * makes sure we have an extra file at the end that is empty * safe to call this multiple times - the implementation will only preallocate one file */ - void preallocateAFile() { getFile( numFiles() , 0, true ); } + void preallocateAFile() { _extentManager.preallocateAFile(); } - MongoDataFile* suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ); + DataFile* suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ); Extent* allocExtent( const char *ns, int size, bool capped, bool enforceQuota ); - MongoDataFile* newestFile(); - /** * @return true if success. false if bad level or error creating profile ns */ bool setProfilingLevel( int newLevel , string& errmsg ); - void flushFiles( bool sync ); + void flushFiles( bool sync ) { return _extentManager.flushFiles( sync ); } /** * @return true if ns is part of the database @@ -114,6 +113,9 @@ namespace mongo { const NamespaceIndex& namespaceIndex() const { return _namespaceIndex; } NamespaceIndex& namespaceIndex() { return _namespaceIndex; } + // TODO: do not think this method should exist, so should try and encapsulate better + ExtentManager& getExtentManager() { return _extentManager; } + /** * @return name of an existing database with same text name but different * casing, if one exists. Otherwise the empty string is returned. If @@ -121,6 +123,8 @@ namespace mongo { */ static string duplicateUncasedName( bool inholderlockalready, const string &name, const string &path, set< string > *duplicates = 0 ); + static Status validateDBName( const StringData& dbname ); + private: ~Database(); // closes files and other cleanup see below. @@ -131,7 +135,6 @@ namespace mongo { */ void checkDuplicateUncasedNames(bool inholderlockalready) const; - bool exists(int n) const; void openAllFiles(); /** @@ -144,12 +147,9 @@ namespace mongo { const string _name; // "alleyinsider" const string _path; // "/data/db" - // must be in the dbLock when touching this (and write locked when writing to of course) - // however during Database object construction we aren't, which is ok as it isn't yet visible - // to others and we are in the dbholder lock then. - vector<MongoDataFile*> _files; - NamespaceIndex _namespaceIndex; + ExtentManager _extentManager; + const string _profileName; // "alleyinsider.system.profile" CCByLoc _ccByLoc; // use by ClientCursor diff --git a/src/mongo/db/database_holder.cpp b/src/mongo/db/database_holder.cpp new file mode 100644 index 00000000000..1061e89ab7b --- /dev/null +++ b/src/mongo/db/database_holder.cpp @@ -0,0 +1,73 @@ +// database_holder.cpp + +/** +* Copyright (C) 2012 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/pch.h" + +#include "mongo/db/auth/auth_index_d.h" +#include "mongo/db/client.h" +#include "mongo/db/database_holder.h" + +namespace mongo { + + Database* DatabaseHolder::getOrCreate( const string& ns, const string& path, bool& justCreated ) { + string dbname = _todb( ns ); + { + SimpleMutex::scoped_lock lk(_m); + Lock::assertAtLeastReadLocked(ns); + DBs& m = _paths[path]; + { + DBs::iterator i = m.find(dbname); + if( i != m.end() ) { + justCreated = false; + return i->second; + } + } + + // todo: protect against getting sprayed with requests for different db names that DNE - + // that would make the DBs map very large. not clear what to do to handle though, + // perhaps just log it, which is what we do here with the "> 40" : + bool cant = !Lock::isWriteLocked(ns); + if( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) || + m.size() > 40 || cant || DEBUG_BUILD ) { + log() << "opening db: " << (path==dbpath?"":path) << ' ' << dbname << endl; + } + massert(15927, "can't open database in a read lock. if db was just closed, consider retrying the query. might otherwise indicate an internal error", !cant); + } + + // we mark our thread as having done writes now as we do not want any exceptions + // once we start creating a new database + cc().writeHappened(); + + // this locks _m for defensive checks, so we don't want to be locked right here : + Database *db = new Database( dbname.c_str() , justCreated , path ); + + { + SimpleMutex::scoped_lock lk(_m); + DBs& m = _paths[path]; + verify( m[dbname] == 0 ); + m[dbname] = db; + _size++; + } + + authindex::configureSystemIndexes(dbname); + + db->clearTmpCollections(); + + return db; + } +} diff --git a/src/mongo/db/databaseholder.h b/src/mongo/db/database_holder.h index fb25fc9d501..fb25fc9d501 100644 --- a/src/mongo/db/databaseholder.h +++ b/src/mongo/db/database_holder.h diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index 8bd7efb37fb..b1ea2074ecc 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -338,7 +338,7 @@ namespace mongo { string dbName = *i; LOG(1) << "\t" << dbName << endl; Client::Context ctx( dbName ); - MongoDataFile *p = cc().database()->getFile( 0 ); + DataFile *p = cc().database()->getFile( 0 ); DataFileHeader *h = p->getHeader(); if ( !h->isCurrentVersion() || forceRepair ) { diff --git a/src/mongo/db/db.h b/src/mongo/db/db.h index 56c2f5acdd7..d0aaa4b1f44 100644 --- a/src/mongo/db/db.h +++ b/src/mongo/db/db.h @@ -20,7 +20,7 @@ #include "mongo/db/client.h" #include "mongo/db/curop.h" -#include "mongo/db/databaseholder.h" +#include "mongo/db/database_holder.h" #include "mongo/db/pdfile.h" #include "mongo/util/net/message.h" diff --git a/src/mongo/db/dbmessage.h b/src/mongo/db/dbmessage.h index f43e1de54d2..a8ee7a7638c 100644 --- a/src/mongo/db/dbmessage.h +++ b/src/mongo/db/dbmessage.h @@ -20,9 +20,9 @@ #include "mongo/bson/bson_validate.h" #include "mongo/client/constants.h" -#include "mongo/db/instance.h" #include "mongo/db/jsobj.h" #include "mongo/util/net/message.h" +#include "mongo/util/net/message_port.h" namespace mongo { @@ -129,9 +129,6 @@ namespace mongo { const char * getns() const { return data; } - void getns(Namespace& ns) const { - ns = data; - } const char * afterNS() const { return data + strlen( data ) + 1; @@ -267,6 +264,20 @@ namespace mongo { } }; + /** + * A response to a DbMessage. + */ + struct DbResponse { + Message *response; + MSGID responseTo; + string exhaustNS; /* points to ns if exhaust mode. 0=normal mode*/ + DbResponse(Message *r, MSGID rt) : response(r), responseTo(rt){ } + DbResponse() { + response = 0; + } + ~DbResponse() { delete response; } + }; + void replyToQuery(int queryResultFlags, AbstractMessagingPort* p, Message& requestMsg, void *data, int size, diff --git a/src/mongo/db/diskloc.h b/src/mongo/db/diskloc.h index 7c831413a8c..2a14edc609f 100644 --- a/src/mongo/db/diskloc.h +++ b/src/mongo/db/diskloc.h @@ -31,7 +31,7 @@ namespace mongo { class Record; class DeletedRecord; class Extent; - class MongoDataFile; + class DataFile; class DiskLoc; template< class Version > class BtreeBucket; @@ -164,7 +164,7 @@ namespace mongo { template< class V > BtreeBucket<V> * btreemod() const; - /*MongoDataFile& pdf() const;*/ + /*DataFile& pdf() const;*/ /// members for Sorter struct SorterDeserializeSettings {}; // unused diff --git a/src/mongo/db/dur.cpp b/src/mongo/db/dur.cpp index b4b288948f6..a7a2e4c8f72 100644 --- a/src/mongo/db/dur.cpp +++ b/src/mongo/db/dur.cpp @@ -343,7 +343,7 @@ namespace mongo { return; const WriteIntent &i = commitJob.lastWrite(); size_t ofs; - MongoMMF *mmf = privateViews.find(i.start(), ofs); + DurableMappedFile *mmf = privateViews.find(i.start(), ofs); if( mmf == 0 ) return; size_t past = ofs + i.length(); @@ -378,8 +378,8 @@ namespace mongo { public: validateSingleMapMatches(unsigned long long& bytes) :_bytes(bytes) {} void operator () (MongoFile *mf) { - if( mf->isMongoMMF() ) { - MongoMMF *mmf = (MongoMMF*) mf; + if( mf->isDurableMappedFile() ) { + DurableMappedFile *mmf = (DurableMappedFile*) mf; const unsigned char *p = (const unsigned char *) mmf->getView(); const unsigned char *w = (const unsigned char *) mmf->view_write(); @@ -515,8 +515,8 @@ namespace mongo { Timer t; for( unsigned x = 0; x < ntodo; x++ ) { dassert( i != e ); - if( (*i)->isMongoMMF() ) { - MongoMMF *mmf = (MongoMMF*) *i; + if( (*i)->isDurableMappedFile() ) { + DurableMappedFile *mmf = (DurableMappedFile*) *i; verify(mmf); if( mmf->willNeedRemap() ) { mmf->willNeedRemap() = false; @@ -692,7 +692,7 @@ namespace mongo { else { stats.curr->_commitsInWriteLock++; // however, if we are already write locked, we must do it now -- up the call tree someone - // may do a write without a new lock acquisition. this can happen when MongoMMF::close() calls + // may do a write without a new lock acquisition. this can happen when DurableMappedFile::close() calls // this method when a file (and its views) is about to go away. // REMAPPRIVATEVIEW(); @@ -704,7 +704,7 @@ namespace mongo { @param lwg set if the durcommitthread *only* -- then we will upgrade the lock to W so we can remapprivateview. only durcommitthread as more than one thread upgrading would potentially deadlock - @see MongoMMF::close() + @see DurableMappedFile::close() */ static void groupCommit(Lock::GlobalWrite *lgw) { try { @@ -751,7 +751,7 @@ namespace mongo { groupCommit(&w); } - /** called when a MongoMMF is closing -- we need to go ahead and group commit in that case before its + /** called when a DurableMappedFile is closing -- we need to go ahead and group commit in that case before its views disappear */ void closingFileNotification() { diff --git a/src/mongo/db/dur.h b/src/mongo/db/dur.h index e7d9e9a74eb..6a6629ffb14 100644 --- a/src/mongo/db/dur.h +++ b/src/mongo/db/dur.h @@ -19,7 +19,7 @@ #pragma once #include "mongo/db/diskloc.h" -#include "mongo/db/mongommf.h" +#include "mongo/db/storage/durable_mapped_file.h" namespace mongo { @@ -135,7 +135,7 @@ namespace mongo { inline T* alreadyDeclared(T *x) { #if defined(_TESTINTENT) - return (T*) MongoMMF::switchToPrivateView(x); + return (T*) DurableMappedFile::switchToPrivateView(x); #else return x; #endif diff --git a/src/mongo/db/dur_commitjob.cpp b/src/mongo/db/dur_commitjob.cpp index a4dd06e832b..f08841a8023 100644 --- a/src/mongo/db/dur_commitjob.cpp +++ b/src/mongo/db/dur_commitjob.cpp @@ -222,7 +222,7 @@ namespace mongo { static int n; if( ++n < 10000 ) { size_t ofs; - MongoMMF *mmf = privateViews._find(w.p, ofs); + DurableMappedFile *mmf = privateViews._find(w.p, ofs); if( mmf ) { log() << "DEBUG note write intent " << w.p << ' ' << mmf->filename() << " ofs:" << hex << ofs << " len:" << w.len << endl; } diff --git a/src/mongo/db/dur_preplogbuffer.cpp b/src/mongo/db/dur_preplogbuffer.cpp index dbdc4b47815..365047cea63 100644 --- a/src/mongo/db/dur_preplogbuffer.cpp +++ b/src/mongo/db/dur_preplogbuffer.cpp @@ -48,8 +48,8 @@ namespace mongo { RelativePath local = RelativePath::fromRelativePath("local"); - static MongoMMF* findMMF_inlock(void *ptr, size_t &ofs) { - MongoMMF *f = privateViews.find_inlock(ptr, ofs); + static DurableMappedFile* findMMF_inlock(void *ptr, size_t &ofs) { + DurableMappedFile *f = privateViews.find_inlock(ptr, ofs); if( f == 0 ) { error() << "findMMF_inlock failed " << privateViews.numberOfViews_inlock() << endl; printStackTrace(); // we want a stack trace and the assert below didn't print a trace once in the real world - not sure why @@ -63,7 +63,7 @@ namespace mongo { /** put the basic write operation into the buffer (bb) to be journaled */ static void prepBasicWrite_inlock(AlignedBuilder&bb, const WriteIntent *i, RelativePath& lastDbPath) { size_t ofs = 1; - MongoMMF *mmf = findMMF_inlock(i->start(), /*out*/ofs); + DurableMappedFile *mmf = findMMF_inlock(i->start(), /*out*/ofs); if( unlikely(!mmf->willNeedRemap()) ) { // tag this mmf as needed a remap of its private view later. @@ -73,7 +73,7 @@ namespace mongo { } // since we have already looked up the mmf, we go ahead and remember the write view location - // so we don't have to find the MongoMMF again later in WRITETODATAFILES() + // so we don't have to find the DurableMappedFile again later in WRITETODATAFILES() // // this was for WRITETODATAFILES_Impl2 so commented out now // diff --git a/src/mongo/db/dur_recover.cpp b/src/mongo/db/dur_recover.cpp index 94a86b78dc3..c3feb296a58 100644 --- a/src/mongo/db/dur_recover.cpp +++ b/src/mongo/db/dur_recover.cpp @@ -35,7 +35,7 @@ #include "mongo/db/dur_stats.h" #include "mongo/db/durop.h" #include "mongo/db/kill_current_op.h" -#include "mongo/db/mongommf.h" +#include "mongo/db/storage/durable_mapped_file.h" #include "mongo/db/pdfile.h" #include "mongo/util/bufreader.h" #include "mongo/util/checksum.h" @@ -221,7 +221,7 @@ namespace mongo { LockMongoFilesShared::assertAtLeastReadLocked(); } - MongoMMF* RecoveryJob::Last::newEntry(const dur::ParsedJournalEntry& entry, RecoveryJob& rj) { + DurableMappedFile* RecoveryJob::Last::newEntry(const dur::ParsedJournalEntry& entry, RecoveryJob& rj) { int num = entry.e->getFileNo(); if( num == fileNo && entry.dbName == dbName ) return mmf; @@ -229,20 +229,20 @@ namespace mongo { string fn = fileName(entry.dbName, num); MongoFile *file; { - MongoFileFinder finder; // must release lock before creating new MongoMMF + MongoFileFinder finder; // must release lock before creating new DurableMappedFile file = finder.findByPath(fn); } if (file) { - verify(file->isMongoMMF()); - mmf = (MongoMMF*)file; + verify(file->isDurableMappedFile()); + mmf = (DurableMappedFile*)file; } else { if( !rj._recovering ) { log() << "journal error applying writes, file " << fn << " is not open" << endl; verify(false); } - boost::shared_ptr<MongoMMF> sp (new MongoMMF); + boost::shared_ptr<DurableMappedFile> sp (new DurableMappedFile); verify(sp->open(fn, false)); rj._mmfs.push_back(sp); mmf = sp.get(); @@ -260,7 +260,7 @@ namespace mongo { verify(entry.dbName); verify((size_t)strnlen(entry.dbName, MaxDatabaseNameLen) < MaxDatabaseNameLen); - MongoMMF *mmf = last.newEntry(entry, *this); + DurableMappedFile *mmf = last.newEntry(entry, *this); if ((entry.e->ofs + entry.e->len) <= mmf->length()) { verify(mmf->view_write()); @@ -306,28 +306,28 @@ namespace mongo { } } - MongoMMF* RecoveryJob::getMongoMMF(const ParsedJournalEntry& entry) { + DurableMappedFile* RecoveryJob::getDurableMappedFile(const ParsedJournalEntry& entry) { verify(entry.dbName); verify((size_t)strnlen(entry.dbName, MaxDatabaseNameLen) < MaxDatabaseNameLen); const string fn = fileName(entry.dbName, entry.e->getFileNo()); MongoFile* file; { - MongoFileFinder finder; // must release lock before creating new MongoMMF + MongoFileFinder finder; // must release lock before creating new DurableMappedFile file = finder.findByPath(fn); } - MongoMMF* mmf; + DurableMappedFile* mmf; if (file) { - verify(file->isMongoMMF()); - mmf = (MongoMMF*)file; + verify(file->isDurableMappedFile()); + mmf = (DurableMappedFile*)file; } else { if( !_recovering ) { log() << "journal error applying writes, file " << fn << " is not open" << endl; verify(false); } - boost::shared_ptr<MongoMMF> sp (new MongoMMF); + boost::shared_ptr<DurableMappedFile> sp (new DurableMappedFile); verify(sp->open(fn, false)); _mmfs.push_back(sp); mmf = sp.get(); @@ -568,7 +568,7 @@ namespace mongo { // at this point in the program so it wouldn't have been a true problem (I think) // can't lock groupCommitMutex here as - // MongoMMF::close()->closingFileNotication()->groupCommit() will lock it + // DurableMappedFile::close()->closingFileNotication()->groupCommit() will lock it // and that would be recursive. // // SimpleMutex::scoped_lock lk2(commitJob.groupCommitMutex); diff --git a/src/mongo/db/dur_recover.h b/src/mongo/db/dur_recover.h index 4cb36eec9aa..5f84d7205f2 100644 --- a/src/mongo/db/dur_recover.h +++ b/src/mongo/db/dur_recover.h @@ -26,7 +26,7 @@ #include "mongo/util/file.h" namespace mongo { - class MongoMMF; + class DurableMappedFile; namespace dur { struct ParsedJournalEntry; @@ -37,9 +37,9 @@ namespace mongo { static class Last { public: Last(); - MongoMMF* newEntry(const ParsedJournalEntry&, RecoveryJob&); + DurableMappedFile* newEntry(const ParsedJournalEntry&, RecoveryJob&); private: - MongoMMF *mmf; + DurableMappedFile *mmf; string dbName; int fileNo; } last; @@ -62,9 +62,9 @@ namespace mongo { bool processFileBuffer(const void *, unsigned len); bool processFile(boost::filesystem::path journalfile); void _close(); // doesn't lock - MongoMMF* getMongoMMF(const ParsedJournalEntry& entry); + DurableMappedFile* getDurableMappedFile(const ParsedJournalEntry& entry); - list<boost::shared_ptr<MongoMMF> > _mmfs; + list<boost::shared_ptr<DurableMappedFile> > _mmfs; unsigned long long _lastDataSyncedFromLastRun; unsigned long long _lastSeqMentionedInConsoleLog; diff --git a/src/mongo/db/dur_writetodatafiles.cpp b/src/mongo/db/dur_writetodatafiles.cpp index a6db4a0fab6..c924dc65e94 100644 --- a/src/mongo/db/dur_writetodatafiles.cpp +++ b/src/mongo/db/dur_writetodatafiles.cpp @@ -79,7 +79,7 @@ namespace mongo { - we couldn't do it from the private views then as they may be changing. would have to then be from the journal alignedbuffer. - we need to be careful the file isn't unmapped on us -- perhaps a mutex or something - with MongoMMF on closes or something to coordinate that. + with DurableMappedFile on closes or something to coordinate that. concurrency: in mmmutex, not necessarily in dbMutex diff --git a/src/mongo/db/durop.cpp b/src/mongo/db/durop.cpp index 23b8db15dd9..9af8aef11ea 100644 --- a/src/mongo/db/durop.cpp +++ b/src/mongo/db/durop.cpp @@ -21,7 +21,7 @@ #include "mongo/db/durop.h" #include "mongo/db/d_concurrency.h" -#include "mongo/db/mongommf.h" +#include "mongo/db/storage/durable_mapped_file.h" #include "mongo/util/alignedbuilder.h" #include "mongo/util/file.h" #include "mongo/util/file_allocator.h" diff --git a/src/mongo/db/fts/fts_command_mongos.cpp b/src/mongo/db/fts/fts_command_mongos.cpp index 8a2e52c5671..f9e11bfcffe 100644 --- a/src/mongo/db/fts/fts_command_mongos.cpp +++ b/src/mongo/db/fts/fts_command_mongos.cpp @@ -24,6 +24,7 @@ #include "mongo/db/fts/fts_command.h" #include "mongo/s/strategy.h" +#include "mongo/util/timer.h" namespace mongo { diff --git a/src/mongo/db/instance.h b/src/mongo/db/instance.h index 8dc744fa923..aeee8418ea7 100644 --- a/src/mongo/db/instance.h +++ b/src/mongo/db/instance.h @@ -23,6 +23,7 @@ #include "mongo/db/client.h" #include "mongo/db/cmdline.h" #include "mongo/db/curop-inl.h" +#include "mongo/db/dbmessage.h" namespace mongo { @@ -54,20 +55,6 @@ namespace mongo { extern DiagLog _diaglog; - /* we defer response until we unlock. don't want a blocked socket to - keep things locked. - */ - struct DbResponse { - Message *response; - MSGID responseTo; - string exhaustNS; /* points to ns if exhaust mode. 0=normal mode*/ - DbResponse(Message *r, MSGID rt) : response(r), responseTo(rt){ } - DbResponse() { - response = 0; - } - ~DbResponse() { delete response; } - }; - void assembleResponse( Message &m, DbResponse &dbresponse, const HostAndPort &client ); void getDatabaseNames( vector< string > &names , const string& usePath = dbpath ); diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp index 139821afd3f..d59a7e097aa 100644 --- a/src/mongo/db/introspect.cpp +++ b/src/mongo/db/introspect.cpp @@ -23,7 +23,7 @@ #include "mongo/db/auth/authorization_session.h" #include "mongo/db/auth/principal_set.h" #include "mongo/db/curop.h" -#include "mongo/db/databaseholder.h" +#include "mongo/db/database_holder.h" #include "mongo/db/introspect.h" #include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" diff --git a/src/mongo/db/namespace_details.cpp b/src/mongo/db/namespace_details.cpp index 4e9f24a99c2..17cd7481db0 100644 --- a/src/mongo/db/namespace_details.cpp +++ b/src/mongo/db/namespace_details.cpp @@ -23,7 +23,7 @@ #include "mongo/db/db.h" #include "mongo/db/json.h" -#include "mongo/db/mongommf.h" +#include "mongo/db/storage/durable_mapped_file.h" #include "mongo/db/ops/delete.h" #include "mongo/db/ops/update.h" #include "mongo/db/pdfile.h" diff --git a/src/mongo/db/namespace_details.h b/src/mongo/db/namespace_details.h index 830c0e73532..965a0e13c2c 100644 --- a/src/mongo/db/namespace_details.h +++ b/src/mongo/db/namespace_details.h @@ -23,7 +23,7 @@ #include "mongo/db/index_names.h" #include "mongo/db/index_set.h" #include "mongo/db/jsobj.h" -#include "mongo/db/mongommf.h" +#include "mongo/db/storage/durable_mapped_file.h" #include "mongo/db/namespace_string.h" #include "mongo/db/querypattern.h" #include "mongo/db/storage/namespace.h" diff --git a/src/mongo/db/ops/query.cpp b/src/mongo/db/ops/query.cpp index 38381efcb98..5ba4c6c4b25 100644 --- a/src/mongo/db/ops/query.cpp +++ b/src/mongo/db/ops/query.cpp @@ -21,6 +21,7 @@ #include "mongo/db/ops/query.h" #include "mongo/bson/util/builder.h" +#include "mongo/client/dbclientinterface.h" #include "mongo/db/clientcursor.h" #include "mongo/db/commands.h" #include "mongo/db/pagefault.h" diff --git a/src/mongo/db/ops/query.h b/src/mongo/db/ops/query.h index 053025b13c7..bcb823f2dcb 100644 --- a/src/mongo/db/ops/query.h +++ b/src/mongo/db/ops/query.h @@ -31,6 +31,7 @@ namespace mongo { + class CurOp; class ParsedQuery; class QueryOptimizerCursor; struct QueryPlanSummary; diff --git a/src/mongo/db/parsed_query.cpp b/src/mongo/db/parsed_query.cpp index 2f07c847637..df087e29994 100644 --- a/src/mongo/db/parsed_query.cpp +++ b/src/mongo/db/parsed_query.cpp @@ -17,6 +17,7 @@ #include <cstring> // for strstr +#include "mongo/client/dbclientinterface.h" #include "mongo/db/dbmessage.h" #include "mongo/db/projection.h" #include "mongo/db/ops/query.h" diff --git a/src/mongo/db/pdfile.cpp b/src/mongo/db/pdfile.cpp index 1f623f3f3f2..2e0431c79a1 100644 --- a/src/mongo/db/pdfile.cpp +++ b/src/mongo/db/pdfile.cpp @@ -345,117 +345,6 @@ namespace mongo { /*---------------------------------------------------------------------*/ - int MongoDataFile::maxSize() { - if ( sizeof( int* ) == 4 ) { - return 512 * 1024 * 1024; - } - else if ( cmdLine.smallfiles ) { - return 0x7ff00000 >> 2; - } - else { - return 0x7ff00000; - } - } - - NOINLINE_DECL void MongoDataFile::badOfs2(int ofs) const { - stringstream ss; - ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database"; - uasserted(13441, ss.str()); - } - - NOINLINE_DECL void MongoDataFile::badOfs(int ofs) const { - stringstream ss; - ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database"; - uasserted(13440, ss.str()); - } - - int MongoDataFile::defaultSize( const char *filename ) const { - int size; - if ( fileNo <= 4 ) - size = (64*1024*1024) << fileNo; - else - size = 0x7ff00000; - if ( cmdLine.smallfiles ) { - size = size >> 2; - } - return size; - } - - static void check(void *_mb) { - if( sizeof(char *) == 4 ) - uassert( 10084 , "can't map file memory - mongo requires 64 bit build for larger datasets", _mb != 0); - else - uassert( 10085 , "can't map file memory", _mb != 0); - } - - /** @return true if found and opened. if uninitialized (prealloc only) does not open. */ - bool MongoDataFile::openExisting( const char *filename ) { - verify( _mb == 0 ); - if( !boost::filesystem::exists(filename) ) - return false; - if( !mmf.open(filename,false) ) { - MONGO_DLOG(2) << "info couldn't open " << filename << " probably end of datafile list" << endl; - return false; - } - _mb = mmf.getView(); verify(_mb); - unsigned long long sz = mmf.length(); - verify( sz <= 0x7fffffff ); - verify( sz % 4096 == 0 ); - if( sz < 64*1024*1024 && !cmdLine.smallfiles ) { - if( sz >= 16*1024*1024 && sz % (1024*1024) == 0 ) { - log() << "info openExisting file size " << sz << " but cmdLine.smallfiles=false: " - << filename << endl; - } - else { - log() << "openExisting size " << sz << " less then minimum file size expectation " - << filename << endl; - verify(false); - } - } - check(_mb); - if( header()->uninitialized() ) - return false; - return true; - } - - void MongoDataFile::open( const char *filename, int minSize, bool preallocateOnly ) { - long size = defaultSize( filename ); - while ( size < minSize ) { - if ( size < maxSize() / 2 ) - size *= 2; - else { - size = maxSize(); - break; - } - } - if ( size > maxSize() ) - size = maxSize(); - - verify( size >= 64*1024*1024 || cmdLine.smallfiles ); - verify( size % 4096 == 0 ); - - if ( preallocateOnly ) { - if ( cmdLine.prealloc ) { - FileAllocator::get()->requestAllocation( filename, size ); - } - return; - } - - { - verify( _mb == 0 ); - unsigned long long sz = size; - if( mmf.create(filename, sz, false) ) - _mb = mmf.getView(); - verify( sz <= 0x7fffffff ); - size = (int) sz; - } - check(_mb); - header()->init(fileNo, size, filename); - } - - void MongoDataFile::flush( bool sync ) { - mmf.flush( sync ); - } void addNewExtentToNamespace(const char *ns, Extent *e, DiskLoc eloc, DiskLoc emptyLoc, bool capped) { NamespaceIndex *ni = nsindex(ns); @@ -478,49 +367,6 @@ namespace mongo { details->addDeletedRec(emptyLoc.drec(), emptyLoc); } - Extent* MongoDataFile::createExtent(const char *ns, int approxSize, bool newCapped, int loops) { - verify( approxSize <= Extent::maxSize() ); - { - // make sizes align with VM page size - int newSize = (approxSize + 0xfff) & 0xfffff000; - verify( newSize >= 0 ); - if( newSize < Extent::maxSize() ) - approxSize = newSize; - } - massert( 10357 , "shutdown in progress", ! inShutdown() ); - massert( 10358 , "bad new extent size", approxSize >= Extent::minSize() && approxSize <= Extent::maxSize() ); - massert( 10359 , "header==0 on new extent: 32 bit mmap space exceeded?", header() ); // null if file open failed - int ExtentSize = min(header()->unusedLength, approxSize); - DiskLoc loc; - if ( ExtentSize < Extent::minSize() ) { - /* note there could be a lot of looping here is db just started and - no files are open yet. we might want to do something about that. */ - if ( loops > 8 ) { - verify( loops < 10000 ); - out() << "warning: loops=" << loops << " fileno:" << fileNo << ' ' << ns << '\n'; - } - log() << "newExtent: " << ns << " file " << fileNo << " full, adding a new file" << endl; - return cc().database()->addAFile( 0, true )->createExtent(ns, approxSize, newCapped, loops+1); - } - int offset = header()->unused.getOfs(); - - DataFileHeader *h = header(); - h->unused.writing().set( fileNo, offset + ExtentSize ); - getDur().writingInt(h->unusedLength) = h->unusedLength - ExtentSize; - loc.set(fileNo, offset); - Extent *e = _getExtent(loc); - DiskLoc emptyLoc = getDur().writing(e)->init(ns, ExtentSize, fileNo, offset, newCapped); - - addNewExtentToNamespace(ns, e, loc, emptyLoc, newCapped); - - DEV { - MONGO_TLOG(1) << "new extent " << ns << " size: 0x" << hex << ExtentSize << " loc: 0x" - << hex << offset << " emptyLoc:" << hex << emptyLoc.getOfs() << dec - << endl; - } - return e; - } - Extent* DataFileMgr::allocFromFreeList(const char *ns, int approxSize, bool capped) { string s = cc().database()->name() + FREELIST_NS; NamespaceDetails *f = nsdetails(s); @@ -612,17 +458,6 @@ namespace mongo { /*---------------------------------------------------------------------*/ - void Extent::markEmpty() { - xnext.Null(); - xprev.Null(); - firstRecord.Null(); - lastRecord.Null(); - } - - DiskLoc Extent::reuse(const char *nsname, bool capped) { - return getDur().writing(this)->_reuse(nsname, capped); - } - void getEmptyLoc(const char *ns, const DiskLoc extentLoc, int extentLength, bool capped, /*out*/DiskLoc& emptyLoc, /*out*/int& delRecLength) { emptyLoc = extentLoc; emptyLoc.inc( Extent::HeaderSize() ); @@ -637,155 +472,6 @@ namespace mongo { } } - DiskLoc Extent::_reuse(const char *nsname, bool capped) { - LOG(3) << "_reuse extent was:" << nsDiagnostic.toString() << " now:" << nsname << endl; - if (magic != extentSignature) { - StringBuilder sb; - sb << "bad extent signature " << integerToHex(magic) - << " for namespace '" << nsDiagnostic.toString() - << "' found in Extent::_reuse"; - msgasserted(10360, sb.str()); - } - nsDiagnostic = nsname; - markEmpty(); - - DiskLoc emptyLoc; - int delRecLength; - getEmptyLoc(nsname, myLoc, length, capped, emptyLoc, delRecLength); - - // todo: some dup code here and below in Extent::init - DeletedRecord* empty = getDur().writing(DataFileMgr::getDeletedRecord(emptyLoc)); - empty->lengthWithHeaders() = delRecLength; - empty->extentOfs() = myLoc.getOfs(); - empty->nextDeleted().Null(); - return emptyLoc; - } - - /* assumes already zeroed -- insufficient for block 'reuse' perhaps */ - DiskLoc Extent::init(const char *nsname, int _length, int _fileNo, int _offset, bool capped) { - magic = extentSignature; - myLoc.set(_fileNo, _offset); - xnext.Null(); - xprev.Null(); - nsDiagnostic = nsname; - length = _length; - firstRecord.Null(); - lastRecord.Null(); - - DiskLoc emptyLoc; - int delRecLength; - getEmptyLoc(nsname, myLoc, _length, capped, emptyLoc, delRecLength); - - DeletedRecord* empty = getDur().writing(DataFileMgr::getDeletedRecord(emptyLoc)); - empty->lengthWithHeaders() = delRecLength; - empty->extentOfs() = myLoc.getOfs(); - empty->nextDeleted().Null(); - return emptyLoc; - } - - bool Extent::validates(const DiskLoc diskLoc, BSONArrayBuilder* errors) { - bool extentOk = true; - if (magic != extentSignature) { - if (errors) { - StringBuilder sb; - sb << "bad extent signature " << integerToHex(magic) - << " in extent " << diskLoc.toString(); - *errors << sb.str(); - } - extentOk = false; - } - if (myLoc != diskLoc) { - if (errors) { - StringBuilder sb; - sb << "extent " << diskLoc.toString() - << " self-pointer is " << myLoc.toString(); - *errors << sb.str(); - } - extentOk = false; - } - if (firstRecord.isNull() != lastRecord.isNull()) { - if (errors) { - StringBuilder sb; - if (firstRecord.isNull()) { - sb << "in extent " << diskLoc.toString() - << ", firstRecord is null but lastRecord is " - << lastRecord.toString(); - } - else { - sb << "in extent " << diskLoc.toString() - << ", firstRecord is " << firstRecord.toString() - << " but lastRecord is null"; - } - *errors << sb.str(); - } - extentOk = false; - } - if (length < minSize()) { - if (errors) { - StringBuilder sb; - sb << "length of extent " << diskLoc.toString() - << " is " << length - << ", which is less than minimum length of " << minSize(); - *errors << sb.str(); - } - extentOk = false; - } - return extentOk; - } - -/* - Record* Extent::newRecord(int len) { - if( firstEmptyRegion.isNull() )8 - return 0; - - verify(len > 0); - int newRecSize = len + Record::HeaderSize; - DiskLoc newRecordLoc = firstEmptyRegion; - Record *r = getRecord(newRecordLoc); - int left = r->netLength() - len; - if( left < 0 ) { - // - firstEmptyRegion.Null(); - return 0; - } - - DiskLoc nextEmpty = r->next.getNextEmpty(firstEmptyRegion); - r->lengthWithHeaders = newRecSize; - r->next.markAsFirstOrLastInExtent(this); // we're now last in the extent - if( !lastRecord.isNull() ) { - verify(getRecord(lastRecord)->next.lastInExtent()); // it was the last one - getRecord(lastRecord)->next.set(newRecordLoc); // until now - r->prev.set(lastRecord); - } - else { - r->prev.markAsFirstOrLastInExtent(this); // we are the first in the extent - verify( firstRecord.isNull() ); - firstRecord = newRecordLoc; - } - lastRecord = newRecordLoc; - - if( left < Record::HeaderSize + 32 ) { - firstEmptyRegion.Null(); - } - else { - firstEmptyRegion.inc(newRecSize); - Record *empty = getRecord(firstEmptyRegion); - empty->next.set(nextEmpty); // not for empty records, unless in-use records, next and prev can be null. - empty->prev.Null(); - empty->lengthWithHeaders = left; - } - - return r; - } -*/ - - int Extent::maxSize() { - int maxExtentSize = 0x7ff00000; - if ( cmdLine.smallfiles ) { - maxExtentSize >>= 2; - } - return maxExtentSize; - } /*---------------------------------------------------------------------*/ diff --git a/src/mongo/db/pdfile.h b/src/mongo/db/pdfile.h index 68fd32d0f89..c2d651f77bd 100644 --- a/src/mongo/db/pdfile.h +++ b/src/mongo/db/pdfile.h @@ -26,10 +26,14 @@ #pragma once #include "mongo/db/client.h" +#include "mongo/db/cursor.h" +#include "mongo/db/database.h" #include "mongo/db/diskloc.h" #include "mongo/db/jsobjmanipulator.h" #include "mongo/db/memconcept.h" -#include "mongo/db/mongommf.h" +#include "mongo/db/storage/data_file.h" +#include "mongo/db/storage/durable_mapped_file.h" +#include "mongo/db/storage/extent.h" #include "mongo/db/namespace_details-inl.h" #include "mongo/db/namespace_string.h" #include "mongo/db/pdfile_version.h" @@ -37,6 +41,7 @@ #include "mongo/util/log.h" #include "mongo/util/mmap.h" + namespace mongo { class Cursor; @@ -61,54 +66,6 @@ namespace mongo { /*---------------------------------------------------------------------*/ - class MongoDataFile { - friend class DataFileMgr; - friend class BasicCursor; - public: - MongoDataFile(int fn) : _mb(0), fileNo(fn) { } - - /** @return true if found and opened. if uninitialized (prealloc only) does not open. */ - bool openExisting( const char *filename ); - - /** creates if DNE */ - void open(const char *filename, int requestedDataSize = 0, bool preallocateOnly = false); - - /* allocate a new extent from this datafile. - @param capped - true if capped collection - @param loops is our recursion check variable - you want to pass in zero - */ - Extent* createExtent(const char *ns, int approxSize, bool capped = false, int loops = 0); - - DataFileHeader *getHeader() { return header(); } - HANDLE getFd() { return mmf.getFd(); } - unsigned long long length() const { return mmf.length(); } - - /* return max size an extent may be */ - static int maxSize(); - - /** fsync */ - void flush( bool sync ); - - /** only use fore debugging */ - Extent* debug_getExtent(DiskLoc loc) { return _getExtent( loc ); } - private: - void badOfs(int) const; - void badOfs2(int) const; - int defaultSize( const char *filename ) const; - - Extent* getExtent(DiskLoc loc) const; - Extent* _getExtent(DiskLoc loc) const; - Record* recordAt(DiskLoc dl) const; - void grow(DiskLoc dl, int size); - - char* p() const { return (char *) _mb; } - DataFileHeader* header() { return (DataFileHeader*) _mb; } - - MongoMMF mmf; - void *_mb; // the memory mapped view - int fileNo; - }; - class DataFileMgr { friend class BasicCursor; public: @@ -192,7 +149,7 @@ namespace mongo { mongo::mutex _precalcedMutex; private: - vector<MongoDataFile *> files; + vector<DataFile *> files; SortPhaseOne* _precalced; }; @@ -273,14 +230,6 @@ namespace mongo { DiskLoc getNext(const DiskLoc& myLoc); DiskLoc getPrev(const DiskLoc& myLoc); - DiskLoc nextInExtent(const DiskLoc& myLoc) { - _accessing(); - if ( _nextOfs == DiskLoc::NullOfs ) - return DiskLoc(); - verify( _nextOfs ); - return DiskLoc(myLoc.a(), _nextOfs); - } - struct NP { int nextOfs; int prevOfs; @@ -341,194 +290,33 @@ namespace mongo { static bool MemoryTrackingEnabled; }; - - /* extents are datafile regions where all the records within the region - belong to the same namespace. - - (11:12:35 AM) dm10gen: when the extent is allocated, all its empty space is stuck into one big DeletedRecord - (11:12:55 AM) dm10gen: and that is placed on the free list - */ - class Extent { - public: - enum { extentSignature = 0x41424344 }; - unsigned magic; - DiskLoc myLoc; - DiskLoc xnext, xprev; /* next/prev extent for this namespace */ - - /* which namespace this extent is for. this is just for troubleshooting really - and won't even be correct if the collection were renamed! - */ - Namespace nsDiagnostic; - - int length; /* size of the extent, including these fields */ - DiskLoc firstRecord; - DiskLoc lastRecord; - char _extentData[4]; - - static int HeaderSize() { return sizeof(Extent)-4; } - - bool validates(const DiskLoc diskLoc, BSONArrayBuilder* errors = NULL); - - BSONObj dump() { - return BSON( "loc" << myLoc.toString() << "xnext" << xnext.toString() << "xprev" << xprev.toString() - << "nsdiag" << nsDiagnostic.toString() - << "size" << length << "firstRecord" << firstRecord.toString() << "lastRecord" << lastRecord.toString()); - } - - void dump(iostream& s) { - s << " loc:" << myLoc.toString() << " xnext:" << xnext.toString() << " xprev:" << xprev.toString() << '\n'; - s << " nsdiag:" << nsDiagnostic.toString() << '\n'; - s << " size:" << length << " firstRecord:" << firstRecord.toString() << " lastRecord:" << lastRecord.toString() << '\n'; - } - - /* assumes already zeroed -- insufficient for block 'reuse' perhaps - Returns a DeletedRecord location which is the data in the extent ready for us. - Caller will need to add that to the freelist structure in namespacedetail. - */ - DiskLoc init(const char *nsname, int _length, int _fileNo, int _offset, bool capped); - - /* like init(), but for a reuse case */ - DiskLoc reuse(const char *nsname, bool newUseIsAsCapped); - - bool isOk() const { return magic == extentSignature; } - void assertOk() const { verify(isOk()); } - - Record* newRecord(int len); - - Record* getRecord(DiskLoc dl) { - verify( !dl.isNull() ); - verify( dl.sameFile(myLoc) ); - int x = dl.getOfs() - myLoc.getOfs(); - verify( x > 0 ); - return (Record *) (((char *) this) + x); - } - - Extent* getNextExtent() { return xnext.isNull() ? 0 : DataFileMgr::getExtent(xnext); } - Extent* getPrevExtent() { return xprev.isNull() ? 0 : DataFileMgr::getExtent(xprev); } - - static int maxSize(); - static int minSize() { return 0x1000; } - /** - * @param len lengt of record we need - * @param lastRecord size of last extent which is a factor in next extent size - */ - static int followupSize(int len, int lastExtentLen); - - /** get a suggested size for the first extent in a namespace - * @param len length of record we need to insert - */ - static int initialSize(int len); - - struct FL { - DiskLoc firstRecord; - DiskLoc lastRecord; - }; - /** often we want to update just the firstRecord and lastRecord fields. - this helper is for that -- for use with getDur().writing() method - */ - FL* fl() { return (FL*) &firstRecord; } - - /** caller must declare write intent first */ - void markEmpty(); - private: - DiskLoc _reuse(const char *nsname, bool newUseIsAsCapped); // recycle an extent and reuse it for a different ns - }; - - /* a datafile - i.e. the "dbname.<#>" files : - - ---------------------- - DataFileHeader - ---------------------- - Extent (for a particular namespace) - Record - ... - Record (some chained for unused space) - ---------------------- - more Extents... - ---------------------- - */ - class DataFileHeader { - public: - int version; - int versionMinor; - int fileLength; - DiskLoc unused; /* unused is the portion of the file that doesn't belong to any allocated extents. -1 = no more */ - int unusedLength; - char reserved[8192 - 4*4 - 8]; - - char data[4]; // first extent starts here - - enum { HeaderSize = 8192 }; - - bool isCurrentVersion() const { - return version == PDFILE_VERSION && ( versionMinor == PDFILE_VERSION_MINOR_22_AND_OLDER - || versionMinor == PDFILE_VERSION_MINOR_24_AND_NEWER - ); - } - - bool uninitialized() const { return version == 0; } - - void init(int fileno, int filelength, const char* filename) { - if ( uninitialized() ) { - DEV log() << "datafileheader::init initializing " << filename << " n:" << fileno << endl; - if( !(filelength > 32768 ) ) { - massert(13640, str::stream() << "DataFileHeader looks corrupt at file open filelength:" << filelength << " fileno:" << fileno, false); - } - - { - // "something" is too vague, but we checked for the right db to be locked higher up the call stack - if( !Lock::somethingWriteLocked() ) { - LockState::Dump(); - log() << "*** TEMP NOT INITIALIZING FILE " << filename << ", not in a write lock." << endl; - log() << "temp bypass until more elaborate change - case that is manifesting is benign anyway" << endl; - return; -/** - log() << "ERROR can't create outside a write lock" << endl; - printStackTrace(); - ::abort(); -**/ - } - } - - getDur().createdFile(filename, filelength); - verify( HeaderSize == 8192 ); - DataFileHeader *h = getDur().writing(this); - h->fileLength = filelength; - h->version = PDFILE_VERSION; - h->versionMinor = PDFILE_VERSION_MINOR_22_AND_OLDER; // All dbs start like this - h->unused.set( fileno, HeaderSize ); - verify( (data-(char*)this) == HeaderSize ); - h->unusedLength = fileLength - HeaderSize - 16; - } - } - - bool isEmpty() const { - return uninitialized() || ( unusedLength == fileLength - HeaderSize - 16 ); - } - }; - #pragma pack() - inline Extent* MongoDataFile::_getExtent(DiskLoc loc) const { + // XXX-ERH + + inline Extent* DataFile::_getExtent(DiskLoc loc) const { loc.assertOk(); Extent *e = (Extent *) (p()+loc.getOfs()); return e; } - inline Extent* MongoDataFile::getExtent(DiskLoc loc) const { + inline Extent* DataFile::getExtent(DiskLoc loc) const { Extent *e = _getExtent(loc); e->assertOk(); memconcept::is(e, memconcept::concept::extent); return e; } -} // namespace mongo - -#include "cursor.h" + inline Extent* Extent::getNextExtent() { + return xnext.isNull() ? 0 : DataFileMgr::getExtent(xnext); + } -namespace mongo { + inline Extent* Extent::getPrevExtent() { + return xprev.isNull() ? 0 : DataFileMgr::getExtent(xprev); + } - inline Record* MongoDataFile::recordAt(DiskLoc dl) const { + // XXX-ERH + inline Record* DataFile::recordAt(DiskLoc dl) const { int ofs = dl.getOfs(); if (ofs < DataFileHeader::HeaderSize) { badOfs(ofs); // will uassert - external call to keep out of the normal code path @@ -610,13 +398,6 @@ namespace mongo { return (const BtreeBucket<V> *) r->data(); } -} // namespace mongo - -#include "database.h" -#include "memconcept.h" - -namespace mongo { - boost::intmax_t dbSize( const char *database ); inline NamespaceIndex* nsindex(const StringData& ns) { diff --git a/src/mongo/db/record.cpp b/src/mongo/db/record.cpp index 46c36537e42..9c4a7b2ef9e 100644 --- a/src/mongo/db/record.cpp +++ b/src/mongo/db/record.cpp @@ -19,7 +19,7 @@ #include "pch.h" #include "mongo/base/init.h" #include "mongo/db/curop.h" -#include "mongo/db/databaseholder.h" +#include "mongo/db/database_holder.h" #include "mongo/db/pagefault.h" #include "mongo/db/pdfile.h" #include "mongo/db/record.h" diff --git a/src/mongo/db/storage/data_file.cpp b/src/mongo/db/storage/data_file.cpp new file mode 100644 index 00000000000..a7cdbd8e488 --- /dev/null +++ b/src/mongo/db/storage/data_file.cpp @@ -0,0 +1,223 @@ +// data_file.cpp + +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/pch.h" + +#include "mongo/db/storage/data_file.h" + +#include <boost/filesystem/operations.hpp> + +#include "mongo/db/cmdline.h" +#include "mongo/db/d_concurrency.h" +#include "mongo/db/dur.h" +#include "mongo/db/lockstate.h" +#include "mongo/db/pdfile.h" +#include "mongo/db/storage/extent.h" +#include "mongo/util/file_allocator.h" + +namespace mongo { + + + // XXX-ERH + void addNewExtentToNamespace(const char *ns, Extent *e, DiskLoc eloc, DiskLoc emptyLoc, bool capped); + + static void data_file_check(void *_mb) { + if( sizeof(char *) == 4 ) + uassert( 10084 , "can't map file memory - mongo requires 64 bit build for larger datasets", _mb != 0); + else + uassert( 10085 , "can't map file memory", _mb != 0); + } + + int DataFile::maxSize() { + if ( sizeof( int* ) == 4 ) { + return 512 * 1024 * 1024; + } + else if ( cmdLine.smallfiles ) { + return 0x7ff00000 >> 2; + } + else { + return 0x7ff00000; + } + } + + NOINLINE_DECL void DataFile::badOfs2(int ofs) const { + stringstream ss; + ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database"; + uasserted(13441, ss.str()); + } + + NOINLINE_DECL void DataFile::badOfs(int ofs) const { + stringstream ss; + ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database"; + uasserted(13440, ss.str()); + } + + int DataFile::defaultSize( const char *filename ) const { + int size; + if ( fileNo <= 4 ) + size = (64*1024*1024) << fileNo; + else + size = 0x7ff00000; + if ( cmdLine.smallfiles ) { + size = size >> 2; + } + return size; + } + + /** @return true if found and opened. if uninitialized (prealloc only) does not open. */ + Status DataFile::openExisting( const char *filename ) { + verify( _mb == 0 ); + if( !boost::filesystem::exists(filename) ) + return Status( ErrorCodes::InvalidPath, "DataFile::openExisting - file does not exist" ); + + if( !mmf.open(filename,false) ) { + MONGO_DLOG(2) << "info couldn't open " << filename << " probably end of datafile list" << endl; + return Status( ErrorCodes::InternalError, "DataFile::openExisting - mmf.open failed" ); + } + _mb = mmf.getView(); verify(_mb); + unsigned long long sz = mmf.length(); + verify( sz <= 0x7fffffff ); + verify( sz % 4096 == 0 ); + if( sz < 64*1024*1024 && !cmdLine.smallfiles ) { + if( sz >= 16*1024*1024 && sz % (1024*1024) == 0 ) { + log() << "info openExisting file size " << sz << " but cmdLine.smallfiles=false: " + << filename << endl; + } + else { + log() << "openExisting size " << sz << " less then minimum file size expectation " + << filename << endl; + verify(false); + } + } + data_file_check(_mb); + return Status::OK(); + } + + void DataFile::open( const char *filename, int minSize, bool preallocateOnly ) { + long size = defaultSize( filename ); + while ( size < minSize ) { + if ( size < maxSize() / 2 ) + size *= 2; + else { + size = maxSize(); + break; + } + } + if ( size > maxSize() ) + size = maxSize(); + + verify( size >= 64*1024*1024 || cmdLine.smallfiles ); + verify( size % 4096 == 0 ); + + if ( preallocateOnly ) { + if ( cmdLine.prealloc ) { + FileAllocator::get()->requestAllocation( filename, size ); + } + return; + } + + { + verify( _mb == 0 ); + unsigned long long sz = size; + if( mmf.create(filename, sz, false) ) + _mb = mmf.getView(); + verify( sz <= 0x7fffffff ); + size = (int) sz; + } + data_file_check(_mb); + header()->init(fileNo, size, filename); + } + + void DataFile::flush( bool sync ) { + mmf.flush( sync ); + } + + Extent* DataFile::createExtent(const char *ns, int approxSize, bool newCapped, int loops) { + verify( approxSize <= Extent::maxSize() ); + { + // make sizes align with VM page size + int newSize = (approxSize + 0xfff) & 0xfffff000; + verify( newSize >= 0 ); + if( newSize < Extent::maxSize() ) + approxSize = newSize; + } + massert( 10357 , "shutdown in progress", ! inShutdown() ); + massert( 10358 , "bad new extent size", approxSize >= Extent::minSize() && approxSize <= Extent::maxSize() ); + massert( 10359 , "header==0 on new extent: 32 bit mmap space exceeded?", header() ); // null if file open failed + int ExtentSize = min(header()->unusedLength, approxSize); + + verify( ExtentSize >= Extent::minSize() ); // TODO: maybe return NULL + + int offset = header()->unused.getOfs(); + + DataFileHeader *h = header(); + h->unused.writing().set( fileNo, offset + ExtentSize ); + getDur().writingInt(h->unusedLength) = h->unusedLength - ExtentSize; + + DiskLoc loc; + loc.set(fileNo, offset); + Extent *e = _getExtent(loc); + DiskLoc emptyLoc = getDur().writing(e)->init(ns, ExtentSize, fileNo, offset, newCapped); + + addNewExtentToNamespace(ns, e, loc, emptyLoc, newCapped); + + DEV { + MONGO_TLOG(1) << "new extent " << ns << " size: 0x" << hex << ExtentSize << " loc: 0x" + << hex << offset << " emptyLoc:" << hex << emptyLoc.getOfs() << dec + << endl; + } + return e; + } + + // ------------------------------------------------------------------------------- + + void DataFileHeader::init(int fileno, int filelength, const char* filename) { + if ( uninitialized() ) { + DEV log() << "datafileheader::init initializing " << filename << " n:" << fileno << endl; + if( !(filelength > 32768 ) ) { + massert(13640, str::stream() << "DataFileHeader looks corrupt at file open filelength:" << filelength << " fileno:" << fileno, false); + } + + { + // "something" is too vague, but we checked for the right db to be locked higher up the call stack + if( !Lock::somethingWriteLocked() ) { + LockState::Dump(); + log() << "*** TEMP NOT INITIALIZING FILE " << filename << ", not in a write lock." << endl; + log() << "temp bypass until more elaborate change - case that is manifesting is benign anyway" << endl; + return; + /** + log() << "ERROR can't create outside a write lock" << endl; + printStackTrace(); + ::abort(); + **/ + } + } + + getDur().createdFile(filename, filelength); + verify( HeaderSize == 8192 ); + DataFileHeader *h = getDur().writing(this); + h->fileLength = filelength; + h->version = PDFILE_VERSION; + h->versionMinor = PDFILE_VERSION_MINOR_22_AND_OLDER; // All dbs start like this + h->unused.set( fileno, HeaderSize ); + verify( (data-(char*)this) == HeaderSize ); + h->unusedLength = fileLength - HeaderSize - 16; + } + } + +} diff --git a/src/mongo/db/storage/data_file.h b/src/mongo/db/storage/data_file.h new file mode 100644 index 00000000000..25bcfee28b9 --- /dev/null +++ b/src/mongo/db/storage/data_file.h @@ -0,0 +1,122 @@ +// data_file.h + +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include "mongo/db/diskloc.h" +#include "mongo/db/pdfile_version.h" +#include "mongo/db/storage/durable_mapped_file.h" + +namespace mongo { + + /* a datafile - i.e. the "dbname.<#>" files : + + ---------------------- + DataFileHeader + ---------------------- + Extent (for a particular namespace) + Record + ... + Record (some chained for unused space) + ---------------------- + more Extents... + ---------------------- + */ +#pragma pack(1) + class DataFileHeader { + public: + int version; + int versionMinor; + int fileLength; + DiskLoc unused; /* unused is the portion of the file that doesn't belong to any allocated extents. -1 = no more */ + int unusedLength; + char reserved[8192 - 4*4 - 8]; + + char data[4]; // first extent starts here + + enum { HeaderSize = 8192 }; + + bool isCurrentVersion() const { + return version == PDFILE_VERSION && ( versionMinor == PDFILE_VERSION_MINOR_22_AND_OLDER + || versionMinor == PDFILE_VERSION_MINOR_24_AND_NEWER + ); + } + + bool uninitialized() const { return version == 0; } + + void init(int fileno, int filelength, const char* filename); + + bool isEmpty() const { + return uninitialized() || ( unusedLength == fileLength - HeaderSize - 16 ); + } + }; +#pragma pack() + + + class DataFile { + friend class DataFileMgr; + friend class BasicCursor; + public: + DataFile(int fn) : _mb(0), fileNo(fn) { } + + /** @return true if found and opened. if uninitialized (prealloc only) does not open. */ + Status openExisting( const char *filename ); + + /** creates if DNE */ + void open(const char *filename, int requestedDataSize = 0, bool preallocateOnly = false); + + /* allocate a new extent from this datafile. + @param capped - true if capped collection + @param loops is our recursion check variable - you want to pass in zero + */ + Extent* createExtent(const char *ns, int approxSize, bool capped = false, int loops = 0); + + DataFileHeader *getHeader() { return header(); } + HANDLE getFd() { return mmf.getFd(); } + unsigned long long length() const { return mmf.length(); } + + /* return max size an extent may be */ + static int maxSize(); + + /** fsync */ + void flush( bool sync ); + + /** only use fore debugging */ + Extent* debug_getExtent(DiskLoc loc) { return _getExtent( loc ); } + + Extent* getExtent(DiskLoc loc) const; + Record* recordAt(DiskLoc dl) const; + private: + void badOfs(int) const; + void badOfs2(int) const; + int defaultSize( const char *filename ) const; + + Extent* _getExtent(DiskLoc loc) const; + + void grow(DiskLoc dl, int size); + + char* p() const { return (char *) _mb; } + DataFileHeader* header() { return (DataFileHeader*) _mb; } + + DurableMappedFile mmf; + void *_mb; // the memory mapped view + int fileNo; + }; + + +} diff --git a/src/mongo/db/mongommf.cpp b/src/mongo/db/storage/durable_mapped_file.cpp index 5537f1d0ba5..5e3dd110fa8 100644 --- a/src/mongo/db/mongommf.cpp +++ b/src/mongo/db/storage/durable_mapped_file.cpp @@ -1,4 +1,4 @@ -// @file mongommf.cpp +// durable_mapped_file.cpp /** * Copyright (C) 2010 10gen Inc. @@ -22,12 +22,11 @@ #include "mongo/pch.h" -#include "mongo/db/mongommf.h" +#include "mongo/db/storage/durable_mapped_file.h" #include "mongo/db/cmdline.h" #include "mongo/db/d_concurrency.h" -#include "mongo/db/d_globals.h" #include "mongo/db/dur.h" #include "mongo/db/dur_journalformat.h" #include "mongo/db/memconcept.h" @@ -37,7 +36,7 @@ using namespace mongoutils; namespace mongo { - void MongoMMF::remapThePrivateView() { + void DurableMappedFile::remapThePrivateView() { verify( cmdLine.dur ); // todo 1.9 : it turns out we require that we always remap to the same address. @@ -50,22 +49,22 @@ namespace mongo { } /** register view. threadsafe */ - void PointerToMMF::add(void *view, MongoMMF *f) { + void PointerToDurableMappedFile::add(void *view, DurableMappedFile *f) { verify(view); verify(f); mutex::scoped_lock lk(_m); - _views.insert( pair<void*,MongoMMF*>(view,f) ); + _views.insert( pair<void*,DurableMappedFile*>(view,f) ); } /** de-register view. threadsafe */ - void PointerToMMF::remove(void *view) { + void PointerToDurableMappedFile::remove(void *view) { if( view ) { mutex::scoped_lock lk(_m); _views.erase(view); } } - PointerToMMF::PointerToMMF() : _m("PointerToMMF") { + PointerToDurableMappedFile::PointerToDurableMappedFile() : _m("PointerToDurableMappedFile") { #if defined(SIZE_MAX) size_t max = SIZE_MAX; #else @@ -74,15 +73,15 @@ namespace mongo { verify( max > (size_t) this ); // just checking that no one redef'd SIZE_MAX and that it is sane // this way we don't need any boundary checking in _find() - _views.insert( pair<void*,MongoMMF*>((void*)0,(MongoMMF*)0) ); - _views.insert( pair<void*,MongoMMF*>((void*)max,(MongoMMF*)0) ); + _views.insert( pair<void*,DurableMappedFile*>((void*)0,(DurableMappedFile*)0) ); + _views.insert( pair<void*,DurableMappedFile*>((void*)max,(DurableMappedFile*)0) ); } /** underscore version of find is for when you are already locked @param ofs out return our offset in the view - @return the MongoMMF to which this pointer belongs + @return the DurableMappedFile to which this pointer belongs */ - MongoMMF* PointerToMMF::find_inlock(void *p, /*out*/ size_t& ofs) { + DurableMappedFile* PointerToDurableMappedFile::find_inlock(void *p, /*out*/ size_t& ofs) { // // .................memory.......................... // v1 p v2 @@ -90,8 +89,8 @@ namespace mongo { // // e.g., _find(p) == v1 // - const pair<void*,MongoMMF*> x = *(--_views.upper_bound(p)); - MongoMMF *mmf = x.second; + const pair<void*,DurableMappedFile*> x = *(--_views.upper_bound(p)); + DurableMappedFile *mmf = x.second; if( mmf ) { size_t o = ((char *)p) - ((char*)x.first); if( o < mmf->length() ) { @@ -105,64 +104,23 @@ namespace mongo { /** find associated MMF object for a given pointer. threadsafe @param ofs out returns offset into the view of the pointer, if found. - @return the MongoMMF to which this pointer belongs. null if not found. + @return the DurableMappedFile to which this pointer belongs. null if not found. */ - MongoMMF* PointerToMMF::find(void *p, /*out*/ size_t& ofs) { + DurableMappedFile* PointerToDurableMappedFile::find(void *p, /*out*/ size_t& ofs) { mutex::scoped_lock lk(_m); return find_inlock(p, ofs); } - PointerToMMF privateViews; - - /* void* MongoMMF::switchToPrivateView(void *readonly_ptr) { - verify( cmdLine.dur ); - verify( testIntent ); - - void *p = readonly_ptr; - - { - size_t ofs=0; - MongoMMF *mmf = ourReadViews.find(p, ofs); - if( mmf ) { - void *res = ((char *)mmf->_view_private) + ofs; - return res; - } - } - - { - size_t ofs=0; - MongoMMF *mmf = privateViews.find(p, ofs); - if( mmf ) { - log() << "dur: perf warning p=" << p << " is already in the writable view of " << mmf->filename() << endl; - return p; - } - } - - // did you call writing() with a pointer that isn't into a datafile? - log() << "dur error switchToPrivateView " << p << endl; - return p; - }*/ - - /* switch to _view_write. normally, this is a bad idea since your changes will not - show up in _view_private if there have been changes there; thus the leading underscore - as a tad of a "warning". but useful when done with some care, such as during - initialization. - */ - void* MongoMMF::_switchToWritableView(void *p) { - size_t ofs; - MongoMMF *f = privateViews.find(p, ofs); - verify( f ); - return (((char *)f->_view_write)+ofs); - } + PointerToDurableMappedFile privateViews; extern string dbpath; // here so that it is precomputed... - void MongoMMF::setPath(const std::string& f) { + void DurableMappedFile::setPath(const std::string& f) { string suffix; string prefix; bool ok = str::rSplitOn(f, '.', prefix, suffix); - uassert(13520, str::stream() << "MongoMMF only supports filenames in a certain format " << f, ok); + uassert(13520, str::stream() << "DurableMappedFile only supports filenames in a certain format " << f, ok); if( suffix == "ns" ) _fileSuffixNo = dur::JEntry::DotNsSuffix; else @@ -171,21 +129,21 @@ namespace mongo { _p = RelativePath::fromFullPath(prefix); } - bool MongoMMF::open(const std::string& fname, bool sequentialHint) { + bool DurableMappedFile::open(const std::string& fname, bool sequentialHint) { LOG(3) << "mmf open " << fname << endl; setPath(fname); _view_write = mapWithOptions(fname.c_str(), sequentialHint ? SEQUENTIAL : 0); return finishOpening(); } - bool MongoMMF::create(const std::string& fname, unsigned long long& len, bool sequentialHint) { + bool DurableMappedFile::create(const std::string& fname, unsigned long long& len, bool sequentialHint) { LOG(3) << "mmf create " << fname << endl; setPath(fname); _view_write = map(fname.c_str(), len, sequentialHint ? SEQUENTIAL : 0); return finishOpening(); } - bool MongoMMF::finishOpening() { + bool DurableMappedFile::finishOpening() { LOG(3) << "mmf finishOpening " << (void*) _view_write << ' ' << filename() << " len:" << length() << endl; if( _view_write ) { if( cmdLine.dur ) { @@ -203,22 +161,22 @@ namespace mongo { return false; } - MongoMMF::MongoMMF() : _willNeedRemap(false) { + DurableMappedFile::DurableMappedFile() : _willNeedRemap(false) { _view_write = _view_private = 0; } - MongoMMF::~MongoMMF() { + DurableMappedFile::~DurableMappedFile() { try { close(); } - catch(...) { error() << "exception in ~MongoMMF" << endl; } + catch(...) { error() << "exception in ~DurableMappedFile" << endl; } } namespace dur { void closingFileNotification(); } - /*virtual*/ void MongoMMF::close() { + /*virtual*/ void DurableMappedFile::close() { LOG(3) << "mmf close " << filename() << endl; if( view_write() /*actually was opened*/ ) { diff --git a/src/mongo/db/mongommf.h b/src/mongo/db/storage/durable_mapped_file.h index a9f80630271..4af433c61d1 100644 --- a/src/mongo/db/mongommf.h +++ b/src/mongo/db/storage/durable_mapped_file.h @@ -1,4 +1,6 @@ -/** @file mongommf.h +// durable_mapped_file.h + +/* * * Copyright (C) 2008 10gen Inc. * @@ -22,17 +24,17 @@ namespace mongo { - /** MongoMMF adds some layers atop memory mapped files - specifically our handling of private views & such. + /** DurableMappedFile adds some layers atop memory mapped files - specifically our handling of private views & such. if you don't care about journaling/durability (temp sort files & such) use MemoryMappedFile class, not this. */ - class MongoMMF : private MemoryMappedFile { + class DurableMappedFile : private MemoryMappedFile { protected: virtual void* viewForFlushing() { return _view_write; } public: - MongoMMF(); - virtual ~MongoMMF(); + DurableMappedFile(); + virtual ~DurableMappedFile(); virtual void close(); /** @return true if opened ok. */ @@ -62,14 +64,6 @@ namespace mongo { */ void* view_write() const { return _view_write; } - - /* switch to _view_write. normally, this is a bad idea since your changes will not - show up in _view_private if there have been changes there; thus the leading underscore - as a tad of a "warning". but useful when done with some care, such as during - initialization. - */ - static void* _switchToWritableView(void *private_ptr); - /** for a filename a/b/c.3 filePath() is "a/b/c" fileSuffixNo() is 3 @@ -91,7 +85,7 @@ namespace mongo { void remapThePrivateView(); - virtual bool isMongoMMF() { return true; } + virtual bool isDurableMappedFile() { return true; } private: @@ -105,16 +99,16 @@ namespace mongo { bool finishOpening(); }; - /** for durability support we want to be able to map pointers to specific MongoMMF objects. + /** for durability support we want to be able to map pointers to specific DurableMappedFile objects. */ - class PointerToMMF : boost::noncopyable { + class PointerToDurableMappedFile : boost::noncopyable { public: - PointerToMMF(); + PointerToDurableMappedFile(); /** register view. threadsafe */ - void add(void *view, MongoMMF *f); + void add(void *view, DurableMappedFile *f); /** de-register view. threadsafe @@ -124,23 +118,23 @@ namespace mongo { /** find associated MMF object for a given pointer. threadsafe @param ofs out returns offset into the view of the pointer, if found. - @return the MongoMMF to which this pointer belongs. null if not found. + @return the DurableMappedFile to which this pointer belongs. null if not found. */ - MongoMMF* find(void *p, /*out*/ size_t& ofs); + DurableMappedFile* find(void *p, /*out*/ size_t& ofs); /** for doing many finds in a row with one lock operation */ mutex& _mutex() { return _m; } - MongoMMF* find_inlock(void *p, /*out*/ size_t& ofs); + DurableMappedFile* find_inlock(void *p, /*out*/ size_t& ofs); - map<void*,MongoMMF*>::iterator finditer_inlock(void *p) { return _views.upper_bound(p); } + map<void*,DurableMappedFile*>::iterator finditer_inlock(void *p) { return _views.upper_bound(p); } unsigned numberOfViews_inlock() const { return _views.size(); } private: mutex _m; - map<void*, MongoMMF*> _views; + map<void*, DurableMappedFile*> _views; }; - // allows a pointer into any private view of a MongoMMF to be resolved to the MongoMMF object - extern PointerToMMF privateViews; + // allows a pointer into any private view of a DurableMappedFile to be resolved to the DurableMappedFile object + extern PointerToDurableMappedFile privateViews; } diff --git a/src/mongo/db/storage/extent.cpp b/src/mongo/db/storage/extent.cpp new file mode 100644 index 00000000000..836af21333b --- /dev/null +++ b/src/mongo/db/storage/extent.cpp @@ -0,0 +1,188 @@ +// extent.cpp + +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/pch.h" + +#include "mongo/db/storage/extent.h" + +#include "mongo/db/dur.h" +#include "mongo/util/mongoutils/str.h" + +// XXX-ERH +#include "mongo/db/pdfile.h" + +namespace mongo { + + static void extent_getEmptyLoc(const char *ns, + const DiskLoc extentLoc, + int extentLength, + bool capped, + /*out*/DiskLoc& emptyLoc, + /*out*/int& delRecLength) { + emptyLoc = extentLoc; + emptyLoc.inc( Extent::HeaderSize() ); + delRecLength = extentLength - Extent::HeaderSize(); + if( delRecLength >= 32*1024 && str::contains(ns, '$') && !capped ) { + // probably an index. so skip forward to keep its records page aligned + int& ofs = emptyLoc.GETOFS(); + int newOfs = (ofs + 0xfff) & ~0xfff; + delRecLength -= (newOfs-ofs); + dassert( delRecLength > 0 ); + ofs = newOfs; + } + } + + + BSONObj Extent::dump() { + return BSON( "loc" << myLoc.toString() + << "xnext" << xnext.toString() + << "xprev" << xprev.toString() + << "nsdiag" << nsDiagnostic.toString() + << "size" << length + << "firstRecord" + << firstRecord.toString() + << "lastRecord" << lastRecord.toString() ); + } + + void Extent::dump(iostream& s) { + s << " loc:" << myLoc.toString() + << " xnext:" << xnext.toString() + << " xprev:" << xprev.toString() << '\n'; + s << " nsdiag:" << nsDiagnostic.toString() << '\n'; + s << " size:" << length + << " firstRecord:" << firstRecord.toString() + << " lastRecord:" << lastRecord.toString() << '\n'; + } + + void Extent::markEmpty() { + xnext.Null(); + xprev.Null(); + firstRecord.Null(); + lastRecord.Null(); + } + + DiskLoc Extent::reuse(const char *nsname, bool capped) { + return getDur().writing(this)->_reuse(nsname, capped); + } + + DiskLoc Extent::_reuse(const char *nsname, bool capped) { + LOG(3) << "_reuse extent was:" << nsDiagnostic.toString() << " now:" << nsname << endl; + if (magic != extentSignature) { + StringBuilder sb; + sb << "bad extent signature " << integerToHex(magic) + << " for namespace '" << nsDiagnostic.toString() + << "' found in Extent::_reuse"; + msgasserted(10360, sb.str()); + } + nsDiagnostic = nsname; + markEmpty(); + + DiskLoc emptyLoc; + int delRecLength; + extent_getEmptyLoc(nsname, myLoc, length, capped, emptyLoc, delRecLength); + + // todo: some dup code here and below in Extent::init + DeletedRecord* empty = getDur().writing(DataFileMgr::getDeletedRecord(emptyLoc)); + empty->lengthWithHeaders() = delRecLength; + empty->extentOfs() = myLoc.getOfs(); + empty->nextDeleted().Null(); + return emptyLoc; + } + + /* assumes already zeroed -- insufficient for block 'reuse' perhaps */ + DiskLoc Extent::init(const char *nsname, int _length, int _fileNo, int _offset, bool capped) { + magic = extentSignature; + myLoc.set(_fileNo, _offset); + xnext.Null(); + xprev.Null(); + nsDiagnostic = nsname; + length = _length; + firstRecord.Null(); + lastRecord.Null(); + + DiskLoc emptyLoc; + int delRecLength; + extent_getEmptyLoc(nsname, myLoc, _length, capped, emptyLoc, delRecLength); + + DeletedRecord* empty = getDur().writing(DataFileMgr::getDeletedRecord(emptyLoc)); + empty->lengthWithHeaders() = delRecLength; + empty->extentOfs() = myLoc.getOfs(); + empty->nextDeleted().Null(); + return emptyLoc; + } + + bool Extent::validates(const DiskLoc diskLoc, BSONArrayBuilder* errors) { + bool extentOk = true; + if (magic != extentSignature) { + if (errors) { + StringBuilder sb; + sb << "bad extent signature " << integerToHex(magic) + << " in extent " << diskLoc.toString(); + *errors << sb.str(); + } + extentOk = false; + } + if (myLoc != diskLoc) { + if (errors) { + StringBuilder sb; + sb << "extent " << diskLoc.toString() + << " self-pointer is " << myLoc.toString(); + *errors << sb.str(); + } + extentOk = false; + } + if (firstRecord.isNull() != lastRecord.isNull()) { + if (errors) { + StringBuilder sb; + if (firstRecord.isNull()) { + sb << "in extent " << diskLoc.toString() + << ", firstRecord is null but lastRecord is " + << lastRecord.toString(); + } + else { + sb << "in extent " << diskLoc.toString() + << ", firstRecord is " << firstRecord.toString() + << " but lastRecord is null"; + } + *errors << sb.str(); + } + extentOk = false; + } + if (length < minSize()) { + if (errors) { + StringBuilder sb; + sb << "length of extent " << diskLoc.toString() + << " is " << length + << ", which is less than minimum length of " << minSize(); + *errors << sb.str(); + } + extentOk = false; + } + return extentOk; + } + + int Extent::maxSize() { + int maxExtentSize = 0x7ff00000; + if ( cmdLine.smallfiles ) { + maxExtentSize >>= 2; + } + return maxExtentSize; + } + + +} diff --git a/src/mongo/db/storage/extent.h b/src/mongo/db/storage/extent.h new file mode 100644 index 00000000000..3553726234b --- /dev/null +++ b/src/mongo/db/storage/extent.h @@ -0,0 +1,111 @@ +// extent.h + +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include "mongo/db/diskloc.h" +#include "mongo/db/storage/namespace.h" + +namespace mongo { + + /* extents are datafile regions where all the records within the region + belong to the same namespace. + + (11:12:35 AM) dm10gen: when the extent is allocated, all its empty space is stuck into one big DeletedRecord + (11:12:55 AM) dm10gen: and that is placed on the free list + */ +#pragma pack(1) + class Extent { + public: + enum { extentSignature = 0x41424344 }; + unsigned magic; + DiskLoc myLoc; + DiskLoc xnext, xprev; /* next/prev extent for this namespace */ + + /* which namespace this extent is for. this is just for troubleshooting really + and won't even be correct if the collection were renamed! + */ + Namespace nsDiagnostic; + + int length; /* size of the extent, including these fields */ + DiskLoc firstRecord; + DiskLoc lastRecord; + char _extentData[4]; + + static int HeaderSize() { return sizeof(Extent)-4; } + + bool validates(const DiskLoc diskLoc, BSONArrayBuilder* errors = NULL); + + BSONObj dump(); + + void dump(iostream& s); + + /* assumes already zeroed -- insufficient for block 'reuse' perhaps + Returns a DeletedRecord location which is the data in the extent ready for us. + Caller will need to add that to the freelist structure in namespacedetail. + */ + DiskLoc init(const char *nsname, int _length, int _fileNo, int _offset, bool capped); + + /* like init(), but for a reuse case */ + DiskLoc reuse(const char *nsname, bool newUseIsAsCapped); + + bool isOk() const { return magic == extentSignature; } + void assertOk() const { verify(isOk()); } + + Record* getRecord(DiskLoc dl) { + verify( !dl.isNull() ); + verify( dl.sameFile(myLoc) ); + int x = dl.getOfs() - myLoc.getOfs(); + verify( x > 0 ); + return (Record *) (((char *) this) + x); + } + + Extent* getNextExtent(); + Extent* getPrevExtent(); + + static int maxSize(); + static int minSize() { return 0x1000; } + /** + * @param len lengt of record we need + * @param lastRecord size of last extent which is a factor in next extent size + */ + static int followupSize(int len, int lastExtentLen); + + /** get a suggested size for the first extent in a namespace + * @param len length of record we need to insert + */ + static int initialSize(int len); + + struct FL { + DiskLoc firstRecord; + DiskLoc lastRecord; + }; + /** often we want to update just the firstRecord and lastRecord fields. + this helper is for that -- for use with getDur().writing() method + */ + FL* fl() { return (FL*) &firstRecord; } + + /** caller must declare write intent first */ + void markEmpty(); + private: + DiskLoc _reuse(const char *nsname, bool newUseIsAsCapped); // recycle an extent and reuse it for a different ns + }; + +#pragma pack() + +} diff --git a/src/mongo/db/storage/extent_manager.cpp b/src/mongo/db/storage/extent_manager.cpp new file mode 100644 index 00000000000..616a852674c --- /dev/null +++ b/src/mongo/db/storage/extent_manager.cpp @@ -0,0 +1,241 @@ +// extent_manager.cpp + +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/pch.h" + +#include <boost/filesystem/operations.hpp> + +#include "mongo/db/client.h" +#include "mongo/db/d_concurrency.h" +#include "mongo/db/storage/data_file.h" +#include "mongo/db/storage/extent_manager.h" + +// XXX-erh +#include "mongo/db/pdfile.h" + +namespace mongo { + + // XXX-ERH + extern bool directoryperdb; + + ExtentManager::ExtentManager( const StringData& dbname, const StringData& path ) + : _dbname( dbname.toString() ), _path( path.toString() ) { + } + + ExtentManager::~ExtentManager() { + reset(); + } + + void ExtentManager::reset() { + for ( size_t i = 0; i < _files.size(); i++ ) { + delete _files[i]; + } + _files.clear(); + } + + boost::filesystem::path ExtentManager::fileName( int n ) const { + stringstream ss; + ss << _dbname << '.' << n; + boost::filesystem::path fullName( _path ); + if ( directoryperdb ) + fullName /= _dbname; + fullName /= ss.str(); + return fullName; + } + + + Status ExtentManager::init() { + verify( _files.size() == 0 ); + + for ( int n = 0; n < DiskLoc::MaxFiles; n++ ) { + boost::filesystem::path fullName = fileName( n ); + if ( !boost::filesystem::exists( fullName ) ) + break; + + string fullNameString = fullName.string(); + + auto_ptr<DataFile> df( new DataFile(n) ); + + Status s = df->openExisting( fullNameString.c_str() ); + if ( !s.isOK() ) { + return s; + } + + if ( df->getHeader()->uninitialized() ) { + // pre-alloc only, so we're done + break; + } + + _files.push_back( df.release() ); + } + + return Status::OK(); + } + + // todo: this is called a lot. streamline the common case + DataFile* ExtentManager::getFile( int n, int sizeNeeded , bool preallocateOnly) { + verify(this); + Lock::assertAtLeastReadLocked( _dbname ); + + if ( n < 0 || n >= DiskLoc::MaxFiles ) { + out() << "getFile(): n=" << n << endl; + massert( 10295 , "getFile(): bad file number value (corrupt db?): run repair", false); + } + DEV { + if ( n > 100 ) { + out() << "getFile(): n=" << n << endl; + } + } + DataFile* p = 0; + if ( !preallocateOnly ) { + while ( n >= (int) _files.size() ) { + verify(this); + if( !Lock::isWriteLocked(_dbname) ) { + log() << "error: getFile() called in a read lock, yet file to return is not yet open" << endl; + log() << " getFile(" << n << ") _files.size:" <<_files.size() << ' ' << fileName(n).string() << endl; + log() << " context ns: " << cc().ns() << endl; + verify(false); + } + _files.push_back(0); + } + p = _files[n]; + } + if ( p == 0 ) { + Lock::assertWriteLocked( _dbname ); + boost::filesystem::path fullName = fileName( n ); + string fullNameString = fullName.string(); + p = new DataFile(n); + int minSize = 0; + if ( n != 0 && _files[ n - 1 ] ) + minSize = _files[ n - 1 ]->getHeader()->fileLength; + if ( sizeNeeded + DataFileHeader::HeaderSize > minSize ) + minSize = sizeNeeded + DataFileHeader::HeaderSize; + try { + p->open( fullNameString.c_str(), minSize, preallocateOnly ); + } + catch ( AssertionException& ) { + delete p; + throw; + } + if ( preallocateOnly ) + delete p; + else + _files[n] = p; + } + return preallocateOnly ? 0 : p; + } + + DataFile* ExtentManager::addAFile( int sizeNeeded, bool preallocateNextFile ) { + Lock::assertWriteLocked( _dbname ); + int n = (int) _files.size(); + DataFile *ret = getFile( n, sizeNeeded ); + if ( preallocateNextFile ) + preallocateAFile(); + return ret; + } + + size_t ExtentManager::numFiles() const { + DEV Lock::assertAtLeastReadLocked( _dbname ); + return _files.size(); + } + + long long ExtentManager::fileSize() const { + long long size=0; + for ( int n = 0; boost::filesystem::exists( fileName(n) ); n++) + size += boost::filesystem::file_size( fileName(n) ); + return size; + } + + void ExtentManager::flushFiles( bool sync ) { + Lock::assertAtLeastReadLocked( _dbname ); + for( vector<DataFile*>::iterator i = _files.begin(); i != _files.end(); i++ ) { + DataFile *f = *i; + f->flush(sync); + } + } + + Record* ExtentManager::recordFor( const DiskLoc& loc ) { + return getFile( loc.a() )->recordAt( loc ); + } + + Extent* ExtentManager::extentFor( const DiskLoc& loc ) { + Record* record = recordFor( loc ); + DiskLoc extentLoc( loc.a(), record->extentOfs() ); + return getFile( loc.a() )->getExtent( extentLoc ); + } + + DiskLoc ExtentManager::getNextRecordInExtent( const DiskLoc& loc ) { + int nextOffset = recordFor( loc )->nextOfs(); + + if ( nextOffset == DiskLoc::NullOfs ) + return DiskLoc(); + + fassert( 16967, abs(nextOffset) >= 8 ); // defensive + return DiskLoc( loc.a(), nextOffset ); + } + + DiskLoc ExtentManager::getNextRecord( const DiskLoc& loc ) { + DiskLoc next = getNextRecordInExtent( loc ); + if ( !next.isNull() ) + return next; + + // now traverse extents + + Extent *e = extentFor(loc); + while ( 1 ) { + if ( e->xnext.isNull() ) + return DiskLoc(); // end of collection + e = e->xnext.ext(); + if ( !e->firstRecord.isNull() ) + break; + // entire extent could be empty, keep looking + } + return e->firstRecord; + } + + DiskLoc ExtentManager::getPrevRecordInExtent( const DiskLoc& loc ) { + int prevOffset = recordFor( loc )->prevOfs(); + + if ( prevOffset == DiskLoc::NullOfs ) + return DiskLoc(); + + fassert( 16968, abs(prevOffset) >= 8 ); // defensive + return DiskLoc( loc.a(), prevOffset ); + } + + DiskLoc ExtentManager::getPrevRecord( const DiskLoc& loc ) { + DiskLoc prev = getPrevRecordInExtent( loc ); + if ( !prev.isNull() ) + return prev; + + // now traverse extents + + Extent *e = extentFor(loc); + while ( 1 ) { + if ( e->xprev.isNull() ) + return DiskLoc(); // end of collection + e = e->xprev.ext(); + if ( !e->firstRecord.isNull() ) + break; + // entire extent could be empty, keep looking + } + return e->firstRecord; + } + + +} diff --git a/src/mongo/db/storage/extent_manager.h b/src/mongo/db/storage/extent_manager.h new file mode 100644 index 00000000000..ea247b527fe --- /dev/null +++ b/src/mongo/db/storage/extent_manager.h @@ -0,0 +1,111 @@ +// extent_manager.h + +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include <string> +#include <vector> + +#include <boost/filesystem/path.hpp> + +#include "mongo/base/status.h" +#include "mongo/base/string_data.h" +#include "mongo/db/diskloc.h" + +namespace mongo { + + class DataFile; + + /** + * ExtentManager basics + * - one per database + * - responsible for managing <db>.# files + * - NOT responsible for .ns file + * - gives out extents + * - responsible for figuring out how to get a new extent + * - can use any method it wants to do so + * - this structure is NOT stored on disk + * - this class is NOT thread safe, locking should be above (for now) + * + * implementation: + * - ExtentManager holds a list of DataFile + */ + class ExtentManager { + MONGO_DISALLOW_COPYING( ExtentManager ); + + public: + ExtentManager( const StringData& dbname, const StringData& path ); + ~ExtentManager(); + + /** + * deletes all state and puts back to original state + */ + void reset(); + + /** + * opens all current files + */ + Status init(); + + size_t numFiles() const; + long long fileSize() const; + + DataFile* getFile( int n, int sizeNeeded = 0, bool preallocateOnly = false ); + + DataFile* addAFile( int sizeNeeded, bool preallocateNextFile ); + + void preallocateAFile() { getFile( numFiles() , 0, true ); }// XXX-ERH + + void flushFiles( bool sync ); + + Record* recordFor( const DiskLoc& loc ); + Extent* extentFor( const DiskLoc& loc ); + + // get(Next|Prev)Record follows the Record linked list + // these WILL cross Extent boundaries + // * @param loc - has to be the DiskLoc for a Record + + DiskLoc getNextRecord( const DiskLoc& loc ); + + DiskLoc getPrevRecord( const DiskLoc& loc ); + + // does NOT traverse extent boundaries + + DiskLoc getNextRecordInExtent( const DiskLoc& loc ); + + DiskLoc getPrevRecordInExtent( const DiskLoc& loc ); + + + private: + + boost::filesystem::path fileName( int n ) const; + + +// ----- + + std::string _dbname; // i.e. "test" + std::string _path; // i.e. "/data/db" + + // must be in the dbLock when touching this (and write locked when writing to of course) + // however during Database object construction we aren't, which is ok as it isn't yet visible + // to others and we are in the dbholder lock then. + std::vector<DataFile*> _files; + + }; + +} diff --git a/src/mongo/db/storage/namespace_index.h b/src/mongo/db/storage/namespace_index.h index 0f18a35ab8f..d8e239160ae 100644 --- a/src/mongo/db/storage/namespace_index.h +++ b/src/mongo/db/storage/namespace_index.h @@ -66,7 +66,7 @@ namespace mongo { void _init(); void maybeMkdir() const; - MongoMMF _f; + DurableMappedFile _f; HashTable<Namespace,NamespaceDetails> *_ht; std::string _dir; std::string _database; diff --git a/src/mongo/db/ttl.cpp b/src/mongo/db/ttl.cpp index 6c082aaaa03..6731a07b6bb 100644 --- a/src/mongo/db/ttl.cpp +++ b/src/mongo/db/ttl.cpp @@ -26,7 +26,7 @@ #include "mongo/db/client.h" #include "mongo/db/commands/fsync.h" #include "mongo/db/commands/server_status.h" -#include "mongo/db/databaseholder.h" +#include "mongo/db/database_holder.h" #include "mongo/db/instance.h" #include "mongo/db/ops/delete.h" #include "mongo/db/repl/is_master.h" diff --git a/src/mongo/dbtests/mmaptests.cpp b/src/mongo/dbtests/mmaptests.cpp index cb69f6b6bbf..b96ac410ca6 100644 --- a/src/mongo/dbtests/mmaptests.cpp +++ b/src/mongo/dbtests/mmaptests.cpp @@ -16,10 +16,10 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "pch.h" -#include "../db/mongommf.h" -#include "../util/timer.h" -#include "dbtests.h" +#include "mongo/pch.h" +#include "mongo/db/storage/durable_mapped_file.h" +#include "mongo/util/timer.h" +#include "mongo/dbtests/dbtests.h" #include <boost/filesystem/operations.hpp> @@ -47,7 +47,7 @@ namespace MMapTests { Lock::GlobalWrite lk; { - MongoMMF f; + DurableMappedFile f; unsigned long long len = 256 * 1024 * 1024; verify( f.create(fn, len, /*sequential*/false) ); { @@ -80,7 +80,7 @@ namespace MMapTests { // we make a lot here -- if we were leaking, presumably it would fail doing this many. Timer t; for( int i = 0; i < N; i++ ) { - MongoMMF f; + DurableMappedFile f; verify( f.open(fn, i%4==1) ); { char *p = (char *) f.getView(); diff --git a/src/mongo/dbtests/namespacetests.cpp b/src/mongo/dbtests/namespacetests.cpp index 2095f0cd27d..3ac695df269 100644 --- a/src/mongo/dbtests/namespacetests.cpp +++ b/src/mongo/dbtests/namespacetests.cpp @@ -2213,7 +2213,7 @@ namespace NamespaceTests { // private: // virtual string spec() const { // // NOTE 256 added to size in _userCreateNS() - // long long big = MongoDataFile::maxSize() - DataFileHeader::HeaderSize; + // long long big = DataFile::maxSize() - DataFileHeader::HeaderSize; // stringstream ss; // ss << "{\"capped\":true,\"size\":" << big << "}"; // return ss.str(); diff --git a/src/mongo/dbtests/pdfiletests.cpp b/src/mongo/dbtests/pdfiletests.cpp index 0f078131b36..76b09fbec64 100644 --- a/src/mongo/dbtests/pdfiletests.cpp +++ b/src/mongo/dbtests/pdfiletests.cpp @@ -370,7 +370,7 @@ namespace PdfileTests { n++; if( n == 5 && sizeof(void*)==4 ) break; - MongoDataFile * f = d->addAFile( big , false ); + DataFile * f = d->addAFile( big , false ); //cout << f->length() << ' ' << n << endl; if ( f->length() == l ) break; diff --git a/src/mongo/dbtests/perftests.cpp b/src/mongo/dbtests/perftests.cpp index e1f82bfe496..2ee37be23f3 100644 --- a/src/mongo/dbtests/perftests.cpp +++ b/src/mongo/dbtests/perftests.cpp @@ -1213,7 +1213,7 @@ namespace PerfTests { for( int i = 0; i < 20; i++ ) { sleepmillis(21); string fn = "/tmp/t1"; - MongoMMF f; + DurableMappedFile f; unsigned long long len = 1 * 1024 * 1024; verify( f.create(fn, len, /*sequential*/rand()%2==0) ); { diff --git a/src/mongo/s/d_logic.cpp b/src/mongo/s/d_logic.cpp index be03ddf02de..44b690c0ec1 100644 --- a/src/mongo/s/d_logic.cpp +++ b/src/mongo/s/d_logic.cpp @@ -22,21 +22,21 @@ mostly around shard management and checking */ -#include "pch.h" -#include <map> -#include <string> - -#include "../db/commands.h" -#include "../db/jsobj.h" -#include "../db/dbmessage.h" +#include "mongo/pch.h" -#include "../client/connpool.h" +#include "mongo/s/d_logic.h" -#include "../util/queue.h" +#include <map> +#include <string> -#include "shard.h" -#include "d_logic.h" -#include "d_writeback.h" +#include "mongo/client/connpool.h" +#include "mongo/db/client.h" +#include "mongo/db/commands.h" +#include "mongo/db/dbmessage.h" +#include "mongo/db/jsobj.h" +#include "mongo/s/d_writeback.h" +#include "mongo/s/shard.h" +#include "mongo/util/queue.h" using namespace std; diff --git a/src/mongo/s/d_logic.h b/src/mongo/s/d_logic.h index 4afc6aa98fd..76cd1385616 100644 --- a/src/mongo/s/d_logic.h +++ b/src/mongo/s/d_logic.h @@ -24,6 +24,7 @@ #include "mongo/s/collection_metadata.h" #include "mongo/s/chunk_version.h" #include "mongo/util/concurrency/ticketholder.h" +#include "mongo/util/net/message.h" namespace mongo { diff --git a/src/mongo/s/server.cpp b/src/mongo/s/server.cpp index 58995b63055..217013f1194 100644 --- a/src/mongo/s/server.cpp +++ b/src/mongo/s/server.cpp @@ -29,6 +29,7 @@ #include "mongo/db/auth/authorization_manager_global.h" #include "mongo/db/dbwebserver.h" #include "mongo/db/initialize_server_global_state.h" +#include "mongo/db/instance.h" #include "mongo/db/lasterror.h" #include "mongo/platform/process_id.h" #include "mongo/s/balance.h" diff --git a/src/mongo/tools/bridge.cpp b/src/mongo/tools/bridge.cpp index df78c64347b..8357a840b45 100644 --- a/src/mongo/tools/bridge.cpp +++ b/src/mongo/tools/bridge.cpp @@ -19,6 +19,7 @@ #include <boost/thread.hpp> #include "mongo/base/initializer.h" +#include "mongo/client/dbclientinterface.h" #include "mongo/db/dbmessage.h" #include "mongo/util/net/listen.h" #include "mongo/util/net/message.h" diff --git a/src/mongo/tools/dump.cpp b/src/mongo/tools/dump.cpp index adb36428a27..418855ac6ad 100644 --- a/src/mongo/tools/dump.cpp +++ b/src/mongo/tools/dump.cpp @@ -267,7 +267,7 @@ public: } - MongoDataFile * mdf = db->getFile( eLoc.a() ); + DataFile * mdf = db->getFile( eLoc.a() ); Extent * e = mdf->debug_getExtent( eLoc ); if ( ! e->isOk() ){ diff --git a/src/mongo/util/mmap.h b/src/mongo/util/mmap.h index 40987ff30eb..3c21c41a6f5 100644 --- a/src/mongo/util/mmap.h +++ b/src/mongo/util/mmap.h @@ -97,7 +97,7 @@ namespace mongo { static long long totalMappedLength(); static void closeAllFiles( stringstream &message ); - virtual bool isMongoMMF() { return false; } + virtual bool isDurableMappedFile() { return false; } string filename() const { return _filename; } void setFilename(const std::string& fn); @@ -129,8 +129,8 @@ namespace mongo { /** look up a MMF by filename. scoped mutex locking convention. example: MMFFinderByName finder; - MongoMMF *a = finder.find("file_name_a"); - MongoMMF *b = finder.find("file_name_b"); + DurableMappedFile *a = finder.find("file_name_a"); + DurableMappedFile *b = finder.find("file_name_b"); */ class MongoFileFinder : boost::noncopyable { public: @@ -223,8 +223,6 @@ namespace mongo { void* remapPrivateView(void *oldPrivateAddr); }; - typedef MemoryMappedFile MMF; - /** p is called from within a mutex that MongoFile uses. so be careful not to deadlock. */ template < class F > inline void MongoFile::forEach( F p ) { diff --git a/src/mongo/util/mmap_win.cpp b/src/mongo/util/mmap_win.cpp index 27a0911cc54..5241e7874a4 100644 --- a/src/mongo/util/mmap_win.cpp +++ b/src/mongo/util/mmap_win.cpp @@ -15,14 +15,15 @@ * limitations under the License. */ -#include "pch.h" -#include "mmap.h" -#include "text.h" -#include "../db/mongommf.h" -#include "../db/d_concurrency.h" -#include "../db/memconcept.h" -#include "mongo/util/timer.h" +#include "mongo/pch.h" + +#include "mongo/db/d_concurrency.h" +#include "mongo/db/memconcept.h" +#include "mongo/db/storage/durable_mapped_file.h" #include "mongo/util/file_allocator.h" +#include "mongo/util/mmap.h" +#include "mongo/util/text.h" +#include "mongo/util/timer.h" namespace mongo { @@ -228,10 +229,10 @@ namespace mongo { size_t chunkNext = chunkStart + MemoryMappedFile::ChunkSize; scoped_lock lk2(privateViews._mutex()); - map<void*,MongoMMF*>::iterator i = privateViews.finditer_inlock((void*) (chunkNext-1)); + map<void*,DurableMappedFile*>::iterator i = privateViews.finditer_inlock((void*) (chunkNext-1)); while( 1 ) { - const pair<void*,MongoMMF*> x = *(--i); - MongoMMF *mmf = x.second; + const pair<void*,DurableMappedFile*> x = *(--i); + DurableMappedFile *mmf = x.second; if( mmf == 0 ) break; diff --git a/src/mongo/util/touch_pages.cpp b/src/mongo/util/touch_pages.cpp index e835c93d831..d5d17900957 100644 --- a/src/mongo/util/touch_pages.cpp +++ b/src/mongo/util/touch_pages.cpp @@ -47,7 +47,7 @@ namespace mongo { uassert( 16154, "namespace does not exist", nsd ); for( DiskLoc L = nsd->firstExtent(); !L.isNull(); L = L.ext()->xnext ) { - MongoDataFile* mdf = cc().database()->getFile( L.a() ); + DataFile* mdf = cc().database()->getFile( L.a() ); massert( 16238, "can't fetch extent file structure", mdf ); touch_location tl; tl.fd = mdf->getFd(); |