diff options
Diffstat (limited to 'src/mongo/db')
32 files changed, 101 insertions, 1474 deletions
diff --git a/src/mongo/db/catalog/collection.cpp b/src/mongo/db/catalog/collection.cpp index bc03f568704..b45aa1b0597 100644 --- a/src/mongo/db/catalog/collection.cpp +++ b/src/mongo/db/catalog/collection.cpp @@ -48,6 +48,7 @@ #include "mongo/db/repl/rs.h" #include "mongo/db/storage/extent.h" #include "mongo/db/storage/extent_manager.h" +#include "mongo/db/storage/record.h" #include "mongo/db/auth/user_document_parser.h" // XXX-ANDY @@ -164,7 +165,7 @@ namespace mongo { BSONObj Collection::docFor(const DiskLoc& loc) const { Record* rec = _recordStore->recordFor( loc ); - return BSONObj( rec->accessed()->data() ); + return BSONObj( rec->data() ); } StatusWith<DiskLoc> Collection::insertDocument( TransactionExperiment* txn, @@ -309,7 +310,7 @@ namespace mongo { OpDebug* debug ) { Record* oldRecord = _recordStore->recordFor( oldLocation ); - BSONObj objOld( oldRecord->accessed()->data() ); + BSONObj objOld( oldRecord->data() ); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; diff --git a/src/mongo/db/catalog/database.h b/src/mongo/db/catalog/database.h index b40717c4d16..ccc984517e0 100644 --- a/src/mongo/db/catalog/database.h +++ b/src/mongo/db/catalog/database.h @@ -32,7 +32,6 @@ #include "mongo/db/namespace_string.h" #include "mongo/db/structure/catalog/namespace_index.h" -#include "mongo/db/storage/record.h" #include "mongo/db/storage_options.h" #include "mongo/util/string_map.h" @@ -145,9 +144,6 @@ namespace mongo { return ns[_name.size()] == '.'; } - const RecordStats& recordStats() const { return _recordStats; } - RecordStats& recordStats() { return _recordStats; } - int getProfilingLevel() const { return _profile; } const char* getProfilingNS() const { return _profileName.c_str(); } @@ -243,7 +239,6 @@ namespace mongo { const string _namespacesName; // "alleyinsider.system.namespaces" const string _indexesName; // "alleyinsider.system.indexes" - RecordStats _recordStats; int _profile; // 0=off. int _magic; // used for making sure the object is still loaded in memory diff --git a/src/mongo/db/client.cpp b/src/mongo/db/client.cpp index 4d135099b6f..e8e8e3951df 100644 --- a/src/mongo/db/client.cpp +++ b/src/mongo/db/client.cpp @@ -55,7 +55,6 @@ #include "mongo/db/instance.h" #include "mongo/db/json.h" #include "mongo/db/jsobj.h" -#include "mongo/db/pagefault.h" #include "mongo/db/repl/rs.h" #include "mongo/db/storage_options.h" #include "mongo/s/chunk_version.h" @@ -105,7 +104,6 @@ namespace mongo { { _hasWrittenThisOperation = false; _hasWrittenSinceCheckpoint = false; - _pageFaultRetryableSection = 0; _connectionId = p ? p->connectionId() : 0; _curOp = new CurOp( this ); #ifndef _WIN32 @@ -451,24 +449,6 @@ namespace mongo { return writers + readers; } - bool Client::allowedToThrowPageFaultException() const { - if ( _hasWrittenThisOperation ) - return false; - - if ( ! _pageFaultRetryableSection ) - return false; - - if ( _pageFaultRetryableSection->laps() >= 100 ) - return false; - - // if we've done a normal yield, it means we're in a ClientCursor or something similar - // in that case, that code should be handling yielding, not us - if ( _curOp && _curOp->numYields() > 0 ) - return false; - - return true; - } - void OpDebug::reset() { extra.reset(); diff --git a/src/mongo/db/client.h b/src/mongo/db/client.h index 2ebfce97d2b..2f82a4c77e2 100644 --- a/src/mongo/db/client.h +++ b/src/mongo/db/client.h @@ -58,7 +58,6 @@ namespace mongo { class Client; class AbstractMessagingPort; class LockCollectionForReading; - class PageFaultRetryableSection; TSP_DECLARE(Client, currentClient) @@ -109,9 +108,6 @@ namespace mongo { BSONObj getHandshake() const { return _handshake; } ConnectionId getConnectionId() const { return _connectionId; } - bool inPageFaultRetryableSection() const { return _pageFaultRetryableSection != 0; } - PageFaultRetryableSection* getPageFaultRetryableSection() const { return _pageFaultRetryableSection; } - void writeHappened() { _hasWrittenSinceCheckpoint = true; _hasWrittenThisOperation = true; } bool hasWrittenSinceCheckpoint() const { return _hasWrittenSinceCheckpoint; } void checkpointHappened() { _hasWrittenSinceCheckpoint = false; } @@ -121,14 +117,6 @@ namespace mongo { _hasWrittenSinceCheckpoint = false; } - /** - * Call this to allow PageFaultExceptions even if writes happened before this was called. - * Writes after this is called still prevent PFEs from being thrown. - */ - void clearHasWrittenThisOperation() { _hasWrittenThisOperation = false; } - - bool allowedToThrowPageFaultException() const; - LockState& lockState() { return _ls; } private: @@ -147,12 +135,9 @@ namespace mongo { bool _hasWrittenThisOperation; bool _hasWrittenSinceCheckpoint; - PageFaultRetryableSection *_pageFaultRetryableSection; LockState _ls; - friend class PageFaultRetryableSection; // TEMP - friend class NoPageFaultsAllowed; // TEMP public: /** "read lock, and set my context, all in one operation" diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp index a28d05cfa8e..362c54917a2 100644 --- a/src/mongo/db/clientcursor.cpp +++ b/src/mongo/db/clientcursor.cpp @@ -44,7 +44,6 @@ #include "mongo/db/db.h" #include "mongo/db/jsobj.h" #include "mongo/db/kill_current_op.h" -#include "mongo/db/pagefault.h" #include "mongo/db/repl/rs.h" #include "mongo/db/repl/write_concern.h" @@ -158,17 +157,11 @@ namespace mongo { #endif } - void ClientCursor::staticYield(int micros, const StringData& ns, const Record* rec) { + void ClientCursor::staticYield(int micros, const StringData& ns) { bool haveReadLock = Lock::isReadLocked(); killCurrentOp.checkForInterrupt(); { - auto_ptr<LockMongoFilesShared> lk; - if ( rec ) { - // need to lock this else rec->touch won't be safe file could disappear - lk.reset( new LockMongoFilesShared() ); - } - dbtempreleasecond unlock; if ( unlock.unlocked() ) { if ( haveReadLock ) { @@ -211,11 +204,6 @@ namespace mongo { << " top: " << c->info() << endl; } - - if ( rec ) - rec->touch(); - - lk.reset(0); // need to release this before dbtempreleasecond } } diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h index bbd1e73af9e..b184f524af7 100644 --- a/src/mongo/db/clientcursor.h +++ b/src/mongo/db/clientcursor.h @@ -85,7 +85,7 @@ namespace mongo { // Yielding. // - static void staticYield(int micros, const StringData& ns, const Record* rec); + static void staticYield(int micros, const StringData& ns); static int suggestYieldMicros(); // diff --git a/src/mongo/db/commands/find_and_modify.cpp b/src/mongo/db/commands/find_and_modify.cpp index 880d3a65f08..97a39390681 100644 --- a/src/mongo/db/commands/find_and_modify.cpp +++ b/src/mongo/db/commands/find_and_modify.cpp @@ -36,7 +36,6 @@ #include "mongo/db/commands.h" #include "mongo/db/dbhelpers.h" #include "mongo/db/instance.h" -#include "mongo/db/pagefault.h" #include "mongo/db/projection.h" #include "mongo/db/ops/delete.h" #include "mongo/db/ops/update.h" @@ -99,20 +98,10 @@ namespace mongo { Lock::DBWrite dbXLock(dbname); Client::Context ctx(ns); - PageFaultRetryableSection s; - while ( 1 ) { - try { - return runNoDirectClient( ns , - query , fields , update , - upsert , returnNew , remove , - result , errmsg ); - } - catch ( PageFaultException& e ) { - e.touch(); - } - } - - + return runNoDirectClient( ns , + query , fields , update , + upsert , returnNew , remove , + result , errmsg ); } void _appendHelper( BSONObjBuilder& result , const BSONObj& doc , bool found , const BSONObj& fields ) { diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp index f3257976c1f..843ad07f995 100644 --- a/src/mongo/db/commands/write_commands/batch_executor.cpp +++ b/src/mongo/db/commands/write_commands/batch_executor.cpp @@ -43,7 +43,6 @@ #include "mongo/db/ops/update_executor.h" #include "mongo/db/ops/update_lifecycle_impl.h" #include "mongo/db/ops/update_request.h" -#include "mongo/db/pagefault.h" #include "mongo/db/repl/is_master.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/repl_settings.h" @@ -866,20 +865,7 @@ namespace mongo { WriteOpResult result; - // NOTE: Deletes will not fault outside the lock once any data has been written - PageFaultRetryableSection pageFaultSection; - while ( true ) { - try { - multiRemove( removeItem, &result ); - break; - } - catch (PageFaultException& pfe) { - pfe.touch(); - invariant(!result.getError()); - continue; - } - fassertFailed(17429); - } + multiRemove( removeItem, &result ); // END CURRENT OP incWriteStats( removeItem, result.getStats(), result.getError(), currentOp.get() ); @@ -965,35 +951,21 @@ namespace mongo { state->request->getInsertRequest()->getDocumentsAt( state->currIndex ) : normalizedInsert.getValue(); - cc().clearHasWrittenThisOperation(); - PageFaultRetryableSection pageFaultSection; - while (true) { - try { - if (!state->lockAndCheck(result)) { - break; - } - + try { + if (state->lockAndCheck(result)) { if (!state->request->isInsertIndexRequest()) { singleInsert(insertDoc, state->getCollection(), result); } else { singleCreateIndex(insertDoc, state->getCollection(), result); } - break; } - catch (const DBException& ex) { - Status status(ex.toStatus()); - if (ErrorCodes::isInterruption(status.code())) - throw; - result->setError(toWriteError(status)); - break; - } - catch (PageFaultException& pfe) { - state->unlock(); - pfe.touch(); - continue; // Try the operation again. - } - fassertFailed(17430); + } + catch (const DBException& ex) { + Status status(ex.toStatus()); + if (ErrorCodes::isInterruption(status.code())) + throw; + result->setError(toWriteError(status)); } // Errors release the write lock, as a matter of policy. diff --git a/src/mongo/db/dbcommands.cpp b/src/mongo/db/dbcommands.cpp index 99bfaa0e0c3..ed0ff4a3015 100644 --- a/src/mongo/db/dbcommands.cpp +++ b/src/mongo/db/dbcommands.cpp @@ -69,6 +69,7 @@ #include "mongo/db/repl/oplog.h" #include "mongo/db/storage/extent_manager.h" #include "mongo/db/storage/mmap_v1/dur_transaction.h" +#include "mongo/db/storage/record.h" #include "mongo/db/structure/catalog/namespace_details.h" #include "mongo/db/write_concern.h" #include "mongo/s/d_logic.h" diff --git a/src/mongo/db/dbhelpers.cpp b/src/mongo/db/dbhelpers.cpp index 08ac0d9a6d1..8a502fe8adb 100644 --- a/src/mongo/db/dbhelpers.cpp +++ b/src/mongo/db/dbhelpers.cpp @@ -46,7 +46,6 @@ #include "mongo/db/ops/update_lifecycle_impl.h" #include "mongo/db/ops/update_request.h" #include "mongo/db/ops/update_result.h" -#include "mongo/db/pagefault.h" #include "mongo/db/query/get_runner.h" #include "mongo/db/query/internal_plans.h" #include "mongo/db/query/query_planner.h" diff --git a/src/mongo/db/diskloc.h b/src/mongo/db/diskloc.h index 6924be86a69..7bbcb545024 100644 --- a/src/mongo/db/diskloc.h +++ b/src/mongo/db/diskloc.h @@ -40,7 +40,6 @@ namespace mongo { - class Record; class DeletedRecord; class Extent; class DataFile; diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp index e53810c7538..e1ed35481e3 100644 --- a/src/mongo/db/exec/collection_scan.cpp +++ b/src/mongo/db/exec/collection_scan.cpp @@ -39,39 +39,13 @@ namespace mongo { - // Some fail points for testing. - MONGO_FP_DECLARE(collscanInMemoryFail); - MONGO_FP_DECLARE(collscanInMemorySucceed); - - // static - bool CollectionScan::diskLocInMemory(DiskLoc loc) { - if (MONGO_FAIL_POINT(collscanInMemoryFail)) { - return false; - } - - if (MONGO_FAIL_POINT(collscanInMemorySucceed)) { - return true; - } - - return _iter->recordFor(loc)->likelyInPhysicalMemory(); - } - CollectionScan::CollectionScan(const CollectionScanParams& params, WorkingSet* workingSet, const MatchExpression* filter) : _workingSet(workingSet), _filter(filter), _params(params), - _nsDropped(false) { - - // We pre-allocate a WSID and use it to pass up fetch requests. It is only - // used to pass up fetch requests and we should never use it for anything else. - _wsidForFetch = _workingSet->allocate(); - WorkingSetMember* member = _workingSet->get(_wsidForFetch); - // Kind of a lie since the obj isn't pointing to the data at loc. but the obj - // won't be used. - member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; - } + _nsDropped(false) { } PlanStage::StageState CollectionScan::work(WorkingSetID* out) { ++_commonStats.works; @@ -92,18 +66,6 @@ namespace mongo { return PlanStage::NEED_TIME; } - // See if the record we're about to access is in memory. If it's not, pass a fetch - // request up. - if (!isEOF()) { - DiskLoc curr = _iter->curr(); - if (!curr.isNull() && !diskLocInMemory(curr)) { - WorkingSetMember* member = _workingSet->get(_wsidForFetch); - member->loc = curr; - *out = _wsidForFetch; - return PlanStage::NEED_FETCH; - } - } - // What we'll return to the user. DiskLoc nextLoc; @@ -169,11 +131,6 @@ namespace mongo { if (NULL != _iter) { _iter->invalidate(dl); } - - // We might have 'dl' inside of the WSM that _wsidForFetch references. This is OK because - // the runner who handles the fetch request does so before releasing any locks (and allowing - // the DiskLoc to be deleted). We also don't use any data in the WSM referenced by - // _wsidForFetch so it's OK to leave the DiskLoc there. } void CollectionScan::prepareToYield() { diff --git a/src/mongo/db/exec/collection_scan.h b/src/mongo/db/exec/collection_scan.h index 05b38df9d52..2b00c5ce3e7 100644 --- a/src/mongo/db/exec/collection_scan.h +++ b/src/mongo/db/exec/collection_scan.h @@ -77,11 +77,6 @@ namespace mongo { // True if Database::getCollection(_ns) == NULL on our first call to work. bool _nsDropped; - // If we want to return a DiskLoc and it points at something that's not in memory, we return - // a a "please page this in" result. We allocate one WSM for this purpose at construction - // and reuse it for any future fetch requests, changing the DiskLoc as appropriate. - WorkingSetID _wsidForFetch; - // Stats CommonStats _commonStats; CollectionScanStats _specificStats; diff --git a/src/mongo/db/exec/fetch.cpp b/src/mongo/db/exec/fetch.cpp index f7ee88cebbd..8077dcacdb6 100644 --- a/src/mongo/db/exec/fetch.cpp +++ b/src/mongo/db/exec/fetch.cpp @@ -32,16 +32,11 @@ #include "mongo/db/exec/filter.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/pdfile.h" -#include "mongo/db/storage/record.h" #include "mongo/util/fail_point_service.h" #include "mongo/util/mongoutils/str.h" namespace mongo { - // Some fail points for testing. - MONGO_FP_DECLARE(fetchInMemoryFail); - MONGO_FP_DECLARE(fetchInMemorySucceed); - FetchStage::FetchStage(WorkingSet* ws, PlanStage* child, const MatchExpression* filter, @@ -49,45 +44,19 @@ namespace mongo { : _collection(collection), _ws(ws), _child(child), - _filter(filter), - _idBeingPagedIn(WorkingSet::INVALID_ID) { - - } + _filter(filter) { } FetchStage::~FetchStage() { } bool FetchStage::isEOF() { - if (WorkingSet::INVALID_ID != _idBeingPagedIn) { - // We asked our parent for a page-in but he didn't get back to us. We still need to - // return the result that _idBeingPagedIn refers to. - return false; - } - return _child->isEOF(); } - bool recordInMemory(const char* data) { - if (MONGO_FAIL_POINT(fetchInMemoryFail)) { - return false; - } - - if (MONGO_FAIL_POINT(fetchInMemorySucceed)) { - return true; - } - - return Record::likelyInPhysicalMemory(data); - } - PlanStage::StageState FetchStage::work(WorkingSetID* out) { ++_commonStats.works; if (isEOF()) { return PlanStage::IS_EOF; } - // If we asked our parent for a page-in last time work(...) was called, finish the fetch. - if (WorkingSet::INVALID_ID != _idBeingPagedIn) { - return fetchCompleted(out); - } - // If we're here, we're not waiting for a DiskLoc to be fetched. Get another to-be-fetched // result from our child. WorkingSetID id = WorkingSet::INVALID_ID; @@ -99,31 +68,19 @@ namespace mongo { // If there's an obj there, there is no fetching to perform. if (member->hasObj()) { ++_specificStats.alreadyHasObj; - return returnIfMatches(member, id, out); - } - - // We need a valid loc to fetch from and this is the only state that has one. - verify(WorkingSetMember::LOC_AND_IDX == member->state); - verify(member->hasLoc()); - - Record* record = _collection->getRecordStore()->recordFor(member->loc); - const char* data = record->dataNoThrowing(); - - if (!recordInMemory(data)) { - // member->loc points to a record that's NOT in memory. Pass a fetch request up. - verify(WorkingSet::INVALID_ID == _idBeingPagedIn); - _idBeingPagedIn = id; - *out = id; - ++_commonStats.needFetch; - return PlanStage::NEED_FETCH; } else { + // We need a valid loc to fetch from and this is the only state that has one. + verify(WorkingSetMember::LOC_AND_IDX == member->state); + verify(member->hasLoc()); + // Don't need index data anymore as we have an obj. member->keyData.clear(); - member->obj = BSONObj(data); + member->obj = _collection->docFor(member->loc); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; - return returnIfMatches(member, id, out); } + + return returnIfMatches(member, id, out); } else if (PlanStage::FAILURE == status) { *out = id; @@ -139,11 +96,7 @@ namespace mongo { return status; } else { - if (PlanStage::NEED_FETCH == status) { - *out = id; - ++_commonStats.needFetch; - } - else if (PlanStage::NEED_TIME == status) { + if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } return status; @@ -164,49 +117,6 @@ namespace mongo { ++_commonStats.invalidates; _child->invalidate(dl, type); - - // If we're holding on to an object that we're waiting for the runner to page in... - if (WorkingSet::INVALID_ID != _idBeingPagedIn) { - // And we haven't already invalidated it... - WorkingSetMember* member = _ws->get(_idBeingPagedIn); - if (member->hasLoc() && (member->loc == dl)) { - // Just fetch it now and kill the DiskLoc. - WorkingSetCommon::fetchAndInvalidateLoc(member, _collection); - } - } - } - - PlanStage::StageState FetchStage::fetchCompleted(WorkingSetID* out) { - WorkingSetMember* member = _ws->get(_idBeingPagedIn); - - // The DiskLoc we're waiting to page in was invalidated (forced fetch). Test for - // matching and maybe pass it up. - if (member->state == WorkingSetMember::OWNED_OBJ) { - WorkingSetID memberID = _idBeingPagedIn; - _idBeingPagedIn = WorkingSet::INVALID_ID; - return returnIfMatches(member, memberID, out); - } - - // Assume that the caller has fetched appropriately. - // TODO: Do we want to double-check the runner? Not sure how reliable likelyInMemory is - // on all platforms. - verify(member->hasLoc()); - verify(!member->hasObj()); - - // Make the (unowned) object. - Record* record = _collection->getRecordStore()->recordFor(member->loc); - const char* data = record->dataNoThrowing(); - member->obj = BSONObj(data); - - // Don't need index data anymore as we have an obj. - member->keyData.clear(); - member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; - verify(!member->obj.isOwned()); - - // Return the obj if it passes our filter. - WorkingSetID memberID = _idBeingPagedIn; - _idBeingPagedIn = WorkingSet::INVALID_ID; - return returnIfMatches(member, memberID, out); } PlanStage::StageState FetchStage::returnIfMatches(WorkingSetMember* member, diff --git a/src/mongo/db/exec/fetch.h b/src/mongo/db/exec/fetch.h index ad32a3ec9ed..668a51f961b 100644 --- a/src/mongo/db/exec/fetch.h +++ b/src/mongo/db/exec/fetch.h @@ -70,11 +70,6 @@ namespace mongo { StageState returnIfMatches(WorkingSetMember* member, WorkingSetID memberID, WorkingSetID* out); - /** - * work(...) delegates to this when we're called after requesting a fetch. - */ - StageState fetchCompleted(WorkingSetID* out); - // Collection which is used by this stage. Used to resolve record ids retrieved by child // stages. The lifetime of the collection must supersede that of the stage. const Collection* _collection; @@ -86,10 +81,6 @@ namespace mongo { // The filter is not owned by us. const MatchExpression* _filter; - // If we're fetching a DiskLoc and it points at something that's not in memory, we return a - // a "please page this in" result and hold on to the WSID until the next call to work(...). - WorkingSetID _idBeingPagedIn; - // Stats CommonStats _commonStats; FetchStats _specificStats; diff --git a/src/mongo/db/fts/fts_util.h b/src/mongo/db/fts/fts_util.h index fe9ad0de341..5a749b27f53 100644 --- a/src/mongo/db/fts/fts_util.h +++ b/src/mongo/db/fts/fts_util.h @@ -34,7 +34,6 @@ #include "mongo/db/hasher.h" #include "mongo/db/jsobj.h" -#include "mongo/db/storage/record.h" #include "mongo/util/unordered_fast_key_table.h" namespace mongo { @@ -59,35 +58,6 @@ namespace mongo { (*s)[i] = (char)tolower( (int)(*s)[i] ); } - /* - * ScoredLocation stores the total score for a document (record *) wrt a search - * - */ - struct ScoredLocation { - ScoredLocation( Record* r, double sc ) - : rec(r), score(sc) { - } - - Record* rec; - double score; - - bool operator<( const ScoredLocation& other ) const { - if ( other.score < score ) - return true; - if ( other.score > score ) - return false; - return rec < other.rec; - } - }; - - // scored location comparison is done based on score - class ScoredLocationComp { - public: - bool operator() (const ScoredLocation& lhs, const ScoredLocation& rhs) const { - return (lhs.score > rhs.score); - } - }; - struct _be_hash { size_t operator()( const BSONElement& e ) const { return static_cast<size_t>( BSONElementHasher::hash64( e, 17 ) ); diff --git a/src/mongo/db/instance.cpp b/src/mongo/db/instance.cpp index 71a3fe9dbc2..9ae661da723 100644 --- a/src/mongo/db/instance.cpp +++ b/src/mongo/db/instance.cpp @@ -73,7 +73,6 @@ #include "mongo/db/ops/update_driver.h" #include "mongo/db/ops/update_executor.h" #include "mongo/db/ops/update_request.h" -#include "mongo/db/pagefault.h" #include "mongo/db/query/new_find.h" #include "mongo/db/repl/is_master.h" #include "mongo/db/repl/oplog.h" @@ -650,34 +649,24 @@ namespace mongo { op.debug().query = pattern; op.setQuery(pattern); - PageFaultRetryableSection s; - while ( 1 ) { - try { - DeleteRequest request(ns); - request.setQuery(pattern); - request.setMulti(!justOne); - request.setUpdateOpLog(true); - DeleteExecutor executor(&request); - uassertStatusOK(executor.prepare()); - Lock::DBWrite lk(ns.ns()); - - // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit - if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) - return; - - Client::Context ctx(ns); - DurTransaction txn; + DeleteRequest request(ns); + request.setQuery(pattern); + request.setMulti(!justOne); + request.setUpdateOpLog(true); + DeleteExecutor executor(&request); + uassertStatusOK(executor.prepare()); + Lock::DBWrite lk(ns.ns()); - long long n = executor.execute(&txn); - lastError.getSafe()->recordDelete( n ); - op.debug().ndeleted = n; - break; - } - catch ( PageFaultException& e ) { - LOG(2) << "recordDelete got a PageFaultException" << endl; - e.touch(); - } - } + // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit + if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) + return; + + Client::Context ctx(ns); + DurTransaction txn; + + long long n = executor.execute(&txn); + lastError.getSafe()->recordDelete( n ); + op.debug().ndeleted = n; } QueryResult* emptyMoreResult(long long); @@ -904,34 +893,25 @@ namespace mongo { uassertStatusOK(status); } - PageFaultRetryableSection s; - while ( true ) { - try { - Lock::DBWrite lk(ns); - - // CONCURRENCY TODO: is being read locked in big log sufficient here? - // writelock is used to synchronize stepdowns w/ writes - uassert( 10058 , "not master", isMasterNs(ns) ); - - if ( handlePossibleShardedMessage( m , 0 ) ) - return; - - Client::Context ctx(ns); - DurTransaction txn; - - if (multi.size() > 1) { - const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; - insertMulti(&txn, ctx, keepGoing, ns, multi, op); - } else { - checkAndInsert(&txn, ctx, ns, multi[0]); - globalOpCounters.incInsertInWriteLock(1); - op.debug().ninserted = 1; - } - return; - } - catch ( PageFaultException& e ) { - e.touch(); - } + Lock::DBWrite lk(ns); + + // CONCURRENCY TODO: is being read locked in big log sufficient here? + // writelock is used to synchronize stepdowns w/ writes + uassert( 10058 , "not master", isMasterNs(ns) ); + + if ( handlePossibleShardedMessage( m , 0 ) ) + return; + + Client::Context ctx(ns); + DurTransaction txn; + + if (multi.size() > 1) { + const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; + insertMulti(&txn, ctx, keepGoing, ns, multi, op); + } else { + checkAndInsert(&txn, ctx, ns, multi[0]); + globalOpCounters.incInsertInWriteLock(1); + op.debug().ninserted = 1; } } diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp index 919ff8bbe24..bdccddb670b 100644 --- a/src/mongo/db/mongod_options.cpp +++ b/src/mongo/db/mongod_options.cpp @@ -919,13 +919,11 @@ namespace mongo { } if (params.count("repair")) { - Record::MemoryTrackingEnabled = false; mongodGlobalParams.upgrade = 1; // --repair implies --upgrade mongodGlobalParams.repair = 1; storageGlobalParams.dur = false; } if (params.count("upgrade")) { - Record::MemoryTrackingEnabled = false; mongodGlobalParams.upgrade = 1; } if (params.count("notablescan")) { diff --git a/src/mongo/db/ops/update.cpp b/src/mongo/db/ops/update.cpp index 63bfe547d32..04b7e26a073 100644 --- a/src/mongo/db/ops/update.cpp +++ b/src/mongo/db/ops/update.cpp @@ -42,7 +42,6 @@ #include "mongo/db/ops/update_driver.h" #include "mongo/db/ops/update_executor.h" #include "mongo/db/ops/update_lifecycle.h" -#include "mongo/db/pagefault.h" #include "mongo/db/pdfile.h" #include "mongo/db/query/get_runner.h" #include "mongo/db/query/lite_parsed_query.h" @@ -52,7 +51,6 @@ #include "mongo/db/repl/is_master.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/storage/mmap_v1/dur_transaction.h" -#include "mongo/db/storage/record.h" #include "mongo/db/catalog/collection.h" #include "mongo/platform/unordered_set.h" diff --git a/src/mongo/db/pagefault.cpp b/src/mongo/db/pagefault.cpp deleted file mode 100644 index 0117195deb7..00000000000 --- a/src/mongo/db/pagefault.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// @file pagefault.cpp - -/** -* Copyright (C) 2012 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - - -#include "mongo/pch.h" - -#include "mongo/db/pagefault.h" - -#include "mongo/db/client.h" -#include "mongo/db/diskloc.h" -#include "mongo/db/pdfile.h" -#include "mongo/db/storage/record.h" -#include "mongo/server.h" -#include "mongo/util/mmap.h" - -namespace mongo { - - PageFaultException::PageFaultException(const Record *_r) - { - verify( cc().allowedToThrowPageFaultException() ); - cc().getPageFaultRetryableSection()->didLap(); - r = _r; - era = LockMongoFilesShared::getEra(); - LOG(2) << "PageFaultException thrown" << endl; - } - - void PageFaultException::touch() { - if ( Lock::isLocked() ) { - warning() << "PageFaultException::touch happening with a lock" << endl; - } - LockMongoFilesShared lk; - if( LockMongoFilesShared::getEra() != era ) { - // files opened and closed. we don't try to handle but just bail out; this is much simpler - // and less error prone and saves us from taking a dbmutex readlock. - MONGO_DLOG(2) << "era changed" << endl; - return; - } - r->touch(); - } - - PageFaultRetryableSection::~PageFaultRetryableSection() { - cc()._pageFaultRetryableSection = 0; - } - PageFaultRetryableSection::PageFaultRetryableSection() { - _laps = 0; - verify( cc()._pageFaultRetryableSection == 0 ); - if( Lock::isLocked() ) { - cc()._pageFaultRetryableSection = 0; - if( debug || logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(3)) ) { - LOGSOME << "info PageFaultRetryableSection will not yield, already locked upon reaching" << endl; - } - } - else { - cc()._pageFaultRetryableSection = this; - } - } - - - NoPageFaultsAllowed::NoPageFaultsAllowed() { - _saved = cc()._pageFaultRetryableSection; - cc()._pageFaultRetryableSection = 0; - } - - NoPageFaultsAllowed::~NoPageFaultsAllowed() { - cc()._pageFaultRetryableSection = _saved; - } -} diff --git a/src/mongo/db/pagefault.h b/src/mongo/db/pagefault.h deleted file mode 100644 index d625d593512..00000000000 --- a/src/mongo/db/pagefault.h +++ /dev/null @@ -1,88 +0,0 @@ -// @file pagefault.h - -/** -* Copyright (C) 2012 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - - -#pragma once - -namespace mongo { - - class Record; - - class PageFaultException /*: public DBException*/ { - unsigned era; - const Record *r; - public: - PageFaultException(const PageFaultException& rhs) : era(rhs.era), r(rhs.r) { } - explicit PageFaultException(const Record*); - void touch(); - }; - - class PageFaultRetryableSection : boost::noncopyable { - unsigned _laps; - public: - unsigned laps() const { return _laps; } - void didLap() { _laps++; } - PageFaultRetryableSection(); - ~PageFaultRetryableSection(); - }; - - /** - * this turns off page faults in a scope - * there are just certain arease where its dangerous - * this might mean the code is dangerous anyway.... - */ - class NoPageFaultsAllowed : boost::noncopyable { - public: - NoPageFaultsAllowed(); - ~NoPageFaultsAllowed(); - private: - PageFaultRetryableSection* _saved; - }; - -#if 0 - inline void how_to_use_example() { - // ... - { - PageFaultRetryableSection s; - while( 1 ) { - try { - writelock lk; // or readlock - // do work - break; - } - catch( PageFaultException& e ) { - e.touch(); - } - } - } - // ... - } -#endif -} diff --git a/src/mongo/db/query/idhack_runner.cpp b/src/mongo/db/query/idhack_runner.cpp index fcd7b0fce8b..091d60fb4ae 100644 --- a/src/mongo/db/query/idhack_runner.cpp +++ b/src/mongo/db/query/idhack_runner.cpp @@ -39,7 +39,6 @@ #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/type_explain.h" #include "mongo/db/query/plan_executor.h" -#include "mongo/db/storage/record.h" #include "mongo/s/d_logic.h" namespace { @@ -132,30 +131,7 @@ namespace mongo { else { invariant(!hasIDProjection(_query.get())); - // Fetch object from storage. - Record* record = _collection->getRecordStore()->recordFor( loc ); - _nscannedObjects++; - - // If the record isn't in memory... - if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) { - // And we're allowed to yield ourselves... - if (Runner::YIELD_AUTO == _policy) { - // Note what we're yielding to fetch so that we don't crash if the loc is - // deleted during a yield. - _locFetching = loc; - // Yield. TODO: Do we want to bother yielding if micros < 0? - int micros = ClientCursor::suggestYieldMicros(); - ClientCursor::staticYield(micros, "", record); - // This can happen when we're yielded for various reasons (e.g. db/idx dropped). - if (_killed) { - _done = true; - return Runner::RUNNER_DEAD; - } - } - } - - // Either the data was in memory or we paged it in. *objOut = _collection->docFor(loc); // If we're sharded make sure the key belongs to us. We need the object to do this. diff --git a/src/mongo/db/query/multi_plan_runner.cpp b/src/mongo/db/query/multi_plan_runner.cpp index a262dfbb62b..9fe4f28e04a 100644 --- a/src/mongo/db/query/multi_plan_runner.cpp +++ b/src/mongo/db/query/multi_plan_runner.cpp @@ -508,30 +508,7 @@ namespace mongo { // lock between receiving the NEED_FETCH and actually fetching(?). verify(member->hasLoc()); - // Actually bring record into memory. - Record* record = _collection->getRecordStore()->recordFor(member->loc); - - // If we're allowed to, go to disk outside of the lock. - if (NULL != _yieldPolicy.get()) { - saveState(); - _yieldPolicy->yield(record); - if (_failure || _killed) { return false; } - restoreState(); - } - else { - // We're set to manually yield. We go to disk in the lock. - record->touch(); - } - - // Record should be in memory now. Log if it's not. - if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) { - OCCASIONALLY { - warning() << "Record wasn't in memory immediately after fetch: " - << member->loc.toString() << endl; - } - } - - // Note that we're not freeing id. Fetch semantics say that we shouldn't. + // Do nothing. TODO: Remove NEED_FETCH entirely from stages. } else if (PlanStage::IS_EOF == state) { // First plan to hit EOF wins automatically. Stop evaluating other plans. diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp index 8f112e77ba3..48f1c3f5bf7 100644 --- a/src/mongo/db/query/plan_executor.cpp +++ b/src/mongo/db/query/plan_executor.cpp @@ -32,7 +32,6 @@ #include "mongo/db/exec/plan_stats.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/exec/working_set_common.h" -#include "mongo/db/storage/record.h" namespace mongo { @@ -146,30 +145,7 @@ namespace mongo { // lock between receiving the NEED_FETCH and actually fetching(?). verify(member->hasLoc()); - // Actually bring record into memory. - Record* record = _collection->getRecordStore()->recordFor(member->loc); - - // If we're allowed to, go to disk outside of the lock. - if (NULL != _yieldPolicy.get()) { - saveState(); - _yieldPolicy->yield(record); - if (_killed) { return Runner::RUNNER_DEAD; } - restoreState(); - } - else { - // We're set to manually yield. We go to disk in the lock. - record->touch(); - } - - // Record should be in memory now. Log if it's not. - if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) { - OCCASIONALLY { - warning() << "Record wasn't in memory immediately after fetch: " - << member->loc.toString() << endl; - } - } - - // Note that we're not freeing id. Fetch semantics say that we shouldn't. + // XXX: remove NEED_FETCH } else if (PlanStage::IS_EOF == code) { return Runner::RUNNER_EOF; diff --git a/src/mongo/db/query/runner_yield_policy.h b/src/mongo/db/query/runner_yield_policy.h index f1db033d093..ee46e412953 100644 --- a/src/mongo/db/query/runner_yield_policy.h +++ b/src/mongo/db/query/runner_yield_policy.h @@ -58,7 +58,7 @@ namespace mongo { * * Provided runner MUST be YIELD_MANUAL. */ - bool yieldAndCheckIfOK(Runner* runner, Record* record = NULL) { + bool yieldAndCheckIfOK(Runner* runner) { invariant(runner); invariant(runner->collection()); @@ -66,8 +66,6 @@ namespace mongo { // If micros is not positive, no point in yielding, nobody waiting. // - // TODO: Do we want to yield anyway if record is not NULL? - // // TODO: Track how many times we actually yield, how many times micros is <0, etc. if (micros <= 0) { return true; } @@ -78,7 +76,7 @@ namespace mongo { runner->collection()->cursorCache()->registerRunner( _runnerYielding ); // Note that this call checks for interrupt, and thus can throw if interrupt flag is set - staticYield(micros, record); + staticYield(micros); if ( runner->collection() ) { // if the runner was killed, runner->collection() will return NULL @@ -91,18 +89,15 @@ namespace mongo { } /** - * Yield, possibly fetching the provided record. Caller is in charge of all runner - * registration. + * Yield. Caller is in charge of all runner registration. * * Used for YIELD_AUTO runners. */ - void yield(Record* rec = NULL) { + void yield() { int micros = ClientCursor::suggestYieldMicros(); - // If there is anyone waiting on us or if there's a record to page-in, yield. TODO: Do - // we want to page in the record in the lock even if nobody is waiting for the lock? - if (micros > 0 || (NULL != rec)) { - staticYield(micros, rec); + if (micros > 0) { + staticYield(micros); // TODO: When do we really want to reset this? Currently we reset it when we // actually yield. As such we'll keep on trying to yield once the tracker has // elapsed. If we reset it even if we don't yield, we'll wait until the time @@ -111,8 +106,8 @@ namespace mongo { } } - static void staticYield(int micros, const Record* rec = NULL) { - ClientCursor::staticYield(micros, "", rec); + static void staticYield(int micros) { + ClientCursor::staticYield(micros, ""); } private: diff --git a/src/mongo/db/storage/extent.h b/src/mongo/db/storage/extent.h index e70c27cd394..81a50a57466 100644 --- a/src/mongo/db/storage/extent.h +++ b/src/mongo/db/storage/extent.h @@ -31,6 +31,7 @@ #pragma once #include "mongo/db/diskloc.h" +#include "mongo/db/storage/record.h" #include "mongo/db/structure/catalog/namespace.h" namespace mongo { diff --git a/src/mongo/db/storage/mmap_v1/dur.h b/src/mongo/db/storage/mmap_v1/dur.h index 6e0a2b03253..f1c760549b5 100644 --- a/src/mongo/db/storage/mmap_v1/dur.h +++ b/src/mongo/db/storage/mmap_v1/dur.h @@ -32,6 +32,7 @@ #include "mongo/db/diskloc.h" #include "mongo/db/storage/durable_mapped_file.h" +#include "mongo/db/storage/record.h" namespace mongo { diff --git a/src/mongo/db/storage/record.cpp b/src/mongo/db/storage/record.cpp deleted file mode 100644 index e3fe67f25a9..00000000000 --- a/src/mongo/db/storage/record.cpp +++ /dev/null @@ -1,606 +0,0 @@ -// record.cpp - -/** -* Copyright (C) 2012 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - -#include "mongo/pch.h" - -#include "mongo/db/storage/record.h" - -#include "mongo/base/init.h" -#include "mongo/db/commands/server_status.h" -#include "mongo/db/curop.h" -#include "mongo/db/catalog/database_holder.h" -#include "mongo/db/pagefault.h" -#include "mongo/db/pdfile.h" -#include "mongo/db/storage/extent_manager.h" -#include "mongo/platform/bits.h" -#include "mongo/platform/unordered_set.h" -#include "mongo/util/net/listen.h" -#include "mongo/util/processinfo.h" -#include "mongo/util/stack_introspect.h" - -namespace mongo { - - RecordStats recordStats; - - void RecordStats::record( BSONObjBuilder& b ) { - b.appendNumber( "accessesNotInMemory" , accessesNotInMemory.load() ); - b.appendNumber( "pageFaultExceptionsThrown" , pageFaultExceptionsThrown.load() ); - - } - - void Record::appendStats( BSONObjBuilder& b ) { - recordStats.record( b ); - } - - namespace ps { - - enum State { - In , Out, Unk - }; - - enum Constants { - SliceSize = 1024 , - MaxChain = 20 , // intentionally very low - NumSlices = 10 , - RotateTimeSecs = 90 , - BigHashSize = 128 - }; - - int hash( size_t region ) { - return - abs( ( ( 7 + (int)(region & 0xFFFF) ) - * ( 11 + (int)( ( region >> 16 ) & 0xFFFF ) ) -#if defined(_WIN64) || defined(__amd64__) - * ( 13 + (int)( ( region >> 32 ) & 0xFFFF ) ) - * ( 17 + (int)( ( region >> 48 ) & 0xFFFF ) ) -#endif - ) % SliceSize ); - } - - - /** - * simple hash map for region -> status - * this constitutes a single region of time - * it does chaining, but very short chains - */ - class Slice { - - struct Entry { - size_t region; - unsigned long long value; - }; - - public: - - Slice() { - reset(); - } - - void reset() { - memset( _data , 0 , SliceSize * sizeof(Entry) ); - _lastReset = time(0); - } - - State get( int regionHash , size_t region , short offset ) { - DEV verify( hash( region ) == regionHash ); - - Entry * e = _get( regionHash , region , false ); - if ( ! e ) - return Unk; - - return ( e->value & ( 1ULL << offset ) ) ? In : Out; - } - - /** - * @return true if added, false if full - */ - bool in( int regionHash , size_t region , short offset ) { - DEV verify( hash( region ) == regionHash ); - - Entry * e = _get( regionHash , region , true ); - if ( ! e ) - return false; - - e->value |= 1ULL << offset; - return true; - } - - - void addPages( unordered_set<size_t>* pages ) { - for ( int i = 0; i < SliceSize; i++ ) { - unsigned long long v = _data[i].value; - - while ( v ) { - int offset = firstBitSet( v ) - 1; - - size_t page = ( _data[i].region << 6 | offset ); - pages->insert( page ); - - v &= ~( 1ULL << offset ); - } - } - } - - time_t lastReset() const { return _lastReset; } - private: - - Entry* _get( int start , size_t region , bool add ) { - for ( int i=0; i<MaxChain; i++ ) { - - int bucket = ( start + i ) % SliceSize; - - if ( _data[bucket].region == 0 ) { - if ( ! add ) - return 0; - - _data[bucket].region = region; - return &_data[bucket]; - } - - if ( _data[bucket].region == region ) { - return &_data[bucket]; - } - } - return 0; - } - - Entry _data[SliceSize]; - time_t _lastReset; - }; - - - /** - * this contains many slices of times - * the idea you put mem status in the current time slice - * and then after a certain period of time, it rolls off so we check again - */ - class Rolling { - - public: - Rolling() - : _lock( "ps::Rolling" ){ - _curSlice = 0; - _lastRotate = Listener::getElapsedTimeMillis(); - } - - - /** - * after this call, we assume the page is in ram - * @param doHalf if this is a known good access, want to put in first half - * @return whether we know the page is in ram - */ - bool access( size_t region , short offset , bool doHalf ) { - int regionHash = hash(region); - - SimpleMutex::scoped_lock lk( _lock ); - - static int rarely_count = 0; - if ( rarely_count++ % ( 2048 / BigHashSize ) == 0 ) { - long long now = Listener::getElapsedTimeMillis(); - RARELY if ( now == 0 ) { - MONGO_TLOG(0) << "warning Listener::getElapsedTimeMillis returning 0ms" << endl; - } - - if ( now - _lastRotate > ( 1000 * RotateTimeSecs ) ) { - _rotate(); - } - } - - for ( int i=0; i<NumSlices / ( doHalf ? 2 : 1 ); i++ ) { - int pos = (_curSlice+i)%NumSlices; - State s = _slices[pos].get( regionHash , region , offset ); - - if ( s == In ) - return true; - - if ( s == Out ) { - _slices[pos].in( regionHash , region , offset ); - return false; - } - } - - // we weren't in any slice - // so add to cur - if ( ! _slices[_curSlice].in( regionHash , region , offset ) ) { - _rotate(); - _slices[_curSlice].in( regionHash , region , offset ); - } - return false; - } - - /** - * @param pages OUT adds each page to the set - * @param mySlices temporary space for copy - * @return the oldest timestamp we have - */ - time_t addPages( unordered_set<size_t>* pages, Slice* mySlices ) { - time_t oldestTimestamp = std::numeric_limits<time_t>::max(); - { - // by doing this, we're in the lock only about half as long as the naive way - // that's measure with a small data set - // Assumption is that with a large data set, actually adding to set may get more costly - // so this way the time in lock should be totally constant - SimpleMutex::scoped_lock lk( _lock ); - memcpy( mySlices, _slices, NumSlices * sizeof(Slice) ); - - for ( int i = 0; i < NumSlices; i++ ) { - oldestTimestamp = std::min( oldestTimestamp, _slices[i].lastReset() ); - } - } - - for ( int i = 0; i < NumSlices; i++ ) { - mySlices[i].addPages( pages ); - } - - return oldestTimestamp; - } - private: - - void _rotate() { - _curSlice = ( _curSlice + 1 ) % NumSlices; - _slices[_curSlice].reset(); - _lastRotate = Listener::getElapsedTimeMillis(); - } - - int _curSlice; - long long _lastRotate; - Slice _slices[NumSlices]; - - SimpleMutex _lock; - }; - - Rolling* rolling = new Rolling[BigHashSize]; - - int bigHash( size_t region ) { - return hash( region ) % BigHashSize; - } - - namespace PointerTable { - - /* A "superpage" is a group of 16 contiguous pages that differ - * only in the low-order 16 bits. This means that there is - * enough room in the low-order bits to store a bitmap for each - * page in the superpage. - */ - static const size_t superpageMask = ~0xffffLL; - static const size_t superpageShift = 16; - static const size_t pageSelectorMask = 0xf000LL; // selects a page in a superpage - static const int pageSelectorShift = 12; - - // Tunables - static const int capacity = 128; // in superpages - static const int bucketSize = 4; // half cache line - static const int buckets = capacity/bucketSize; - - struct Data { - /** organized similar to a CPU cache - * bucketSize-way set associative - * least-recently-inserted replacement policy - */ - size_t _table[buckets][bucketSize]; - long long _lastReset; // time in millis - }; - - void reset(Data* data) { - memset(data->_table, 0, sizeof(data->_table)); - data->_lastReset = Listener::getElapsedTimeMillis(); - } - - inline void resetIfNeeded( Data* data ) { - const long long now = Listener::getElapsedTimeMillis(); - if (MONGO_unlikely(now - data->_lastReset > RotateTimeSecs*1000)) - reset(data); - } - - inline size_t pageBitOf(size_t ptr) { - return 1LL << ((ptr & pageSelectorMask) >> pageSelectorShift); - } - - inline size_t superpageOf(size_t ptr) { - return ptr & superpageMask; - } - - inline size_t bucketFor(size_t ptr) { - return (ptr >> superpageShift) % buckets; - } - - inline bool haveSeenPage(size_t superpage, size_t ptr) { - return superpage & pageBitOf(ptr); - } - - inline void markPageSeen(size_t& superpage, size_t ptr) { - superpage |= pageBitOf(ptr); - } - - /** call this to check a page has been seen yet. */ - inline bool seen(Data* data, size_t ptr) { - resetIfNeeded(data); - - // A bucket contains 4 superpages each containing 16 contiguous pages - // See below for a more detailed explanation of superpages - size_t* bucket = data->_table[bucketFor(ptr)]; - - for (int i = 0; i < bucketSize; i++) { - if (superpageOf(ptr) == superpageOf(bucket[i])) { - if (haveSeenPage(bucket[i], ptr)) - return true; - - markPageSeen(bucket[i], ptr); - return false; - } - } - - // superpage isn't in thread-local cache - // slide bucket forward and add new superpage at front - for (int i = bucketSize-1; i > 0; i--) - bucket[i] = bucket[i-1]; - - bucket[0] = superpageOf(ptr); - markPageSeen(bucket[0], ptr); - - return false; - } - - Data* getData(); - - }; - - void appendWorkingSetInfo( BSONObjBuilder& b ) { - boost::scoped_array<Slice> mySlices( new Slice[NumSlices] ); - - unordered_set<size_t> totalPages; - Timer t; - - time_t timestamp = 0; - - for ( int i = 0; i < BigHashSize; i++ ) { - time_t myOldestTimestamp = rolling[i].addPages( &totalPages, mySlices.get() ); - timestamp = std::max( timestamp, myOldestTimestamp ); - } - - b.append( "note", "thisIsAnEstimate" ); - b.appendNumber( "pagesInMemory", totalPages.size() ); - b.appendNumber( "computationTimeMicros", static_cast<long long>(t.micros()) ); - b.append( "overSeconds", static_cast<int>( time(0) - timestamp ) ); - - } - - } - - - // These need to be outside the ps namespace due to the way they are defined -#if defined(MONGO_HAVE___THREAD) - __thread ps::PointerTable::Data _pointerTableData; - ps::PointerTable::Data* ps::PointerTable::getData() { - return &_pointerTableData; - } -#elif defined(MONGO_HAVE___DECLSPEC_THREAD) - __declspec( thread ) ps::PointerTable::Data _pointerTableData; - ps::PointerTable::Data* ps::PointerTable::getData() { - return &_pointerTableData; - } -#else - TSP_DEFINE(ps::PointerTable::Data, _pointerTableData); - ps::PointerTable::Data* ps::PointerTable::getData() { - return _pointerTableData.getMake(); - } -#endif - - bool Record::MemoryTrackingEnabled = true; - - volatile int __record_touch_dummy = 1; // this is used to make sure the compiler doesn't get too smart on us - void Record::touch( bool entireRecrd ) const { - if ( _lengthWithHeaders > HeaderSize ) { // this also makes sure lengthWithHeaders is in memory - const char * addr = _data; - const int length = _netLength(); - for ( int i = 0 ; i <= length ; i += 2048 ) { - __record_touch_dummy += addr[i]; - - break; // TODO: remove this, pending SERVER-3711 - - // note if this is a touch of a deletedrecord, we don't want to touch more than the first part. we may simply - // be updated the linked list and a deletedrecord could be gigantic. similar circumstance just less extreme - // exists for any record if we are just updating its header, say on a remove(); some sort of hints might be - // useful. - - if ( ! entireRecrd ) - break; - } - } - } - - static bool blockSupported = false; - - MONGO_INITIALIZER_WITH_PREREQUISITES(RecordBlockSupported, - ("SystemInfo"))(InitializerContext* cx) { - blockSupported = ProcessInfo::blockCheckSupported(); - return Status::OK(); - } - - void Record::appendWorkingSetInfo( BSONObjBuilder& b ) { - if ( ! blockSupported ) { - b.append( "info", "not supported" ); - return; - } - - ps::appendWorkingSetInfo( b ); - } - - bool Record::likelyInPhysicalMemory() const { - return likelyInPhysicalMemory( _data ); - } - - bool Record::likelyInPhysicalMemory( const char* data ) { - DEV { - // we don't want to do this too often as it makes DEBUG builds very slow - // at some point we might want to pass in what type of Record this is and - // then we can use that to make a more intelligent decision - int mod; - if ( Lock::isReadLocked() ) { - // we'll check read locks less often - // since its a lower probability of error - mod = 1000; - } - else if ( Lock::isLocked() ) { - // write lock's can more obviously cause issues - // check more often than reads - mod = 100; - } - else { - // no lock??? - // if we get here we should be very paranoid - mod = 50; - } - - if ( rand() % mod == 0 ) - return false; - } // end DEV test code - - if ( ! MemoryTrackingEnabled ) - return true; - - const size_t page = (size_t)data >> 12; - const size_t region = page >> 6; - const size_t offset = page & 0x3f; - - const bool seen = ps::PointerTable::seen( ps::PointerTable::getData(), reinterpret_cast<size_t>(data)); - if (seen || ps::rolling[ps::bigHash(region)].access( region , offset , false ) ) { - -#ifdef _DEBUG - if ( blockSupported && ! ProcessInfo::blockInMemory(data) ) { - RARELY warning() << "we think data is in ram but system says no" << endl; - } -#endif - return true; - } - - if ( ! blockSupported ) { - // this means we don't fallback to system call - // and assume things aren't in memory - // possible we yield too much - but better than not yielding through a fault - return false; - } - - return ProcessInfo::blockInMemory( const_cast<char*>(data) ); - } - - - Record* Record::accessed() { - const bool seen = ps::PointerTable::seen( ps::PointerTable::getData(), reinterpret_cast<size_t>(_data)); - if (!seen){ - const size_t page = (size_t)_data >> 12; - const size_t region = page >> 6; - const size_t offset = page & 0x3f; - ps::rolling[ps::bigHash(region)].access( region , offset , true ); - } - - return this; - } - - void Record::_accessing() const { - if ( likelyInPhysicalMemory() ) - return; - - const Client& client = cc(); - Database* db = client.getContext()->db(); - - recordStats.accessesNotInMemory.fetchAndAdd(1); - if ( db ) - db->recordStats().accessesNotInMemory.fetchAndAdd(1); - - if ( ! client.allowedToThrowPageFaultException() ) - return; - - if ( client.curop() && client.curop()->elapsedMillis() > 50 ) { - // this means we've been going too long to restart - // we should track how often this happens - return; - } - - // note you can infer how many throws weren't allowed by subtracting: - // accessesNotInMemory - pageFaultExceptionsThrown - recordStats.pageFaultExceptionsThrown.fetchAndAdd(1); - if ( db ) - db->recordStats().pageFaultExceptionsThrown.fetchAndAdd(1); - - DEV fassert( 16236 , ! inConstructorChain(true) ); - throw PageFaultException(this); - } - - void DeletedRecord::_accessing() const { - - } - - namespace { - - class WorkingSetSSS : public ServerStatusSection { - public: - WorkingSetSSS() : ServerStatusSection( "workingSet" ){} - virtual bool includeByDefault() const { return false; } - - BSONObj generateSection(const BSONElement& configElement) const { - BSONObjBuilder b; - Record::appendWorkingSetInfo( b ); - return b.obj(); - } - - } asserts; - - class RecordStats : public ServerStatusSection { - public: - RecordStats() : ServerStatusSection( "recordStats" ){} - virtual bool includeByDefault() const { return true; } - - BSONObj generateSection(const BSONElement& configElement) const { - BSONObjBuilder record; - - Record::appendStats( record ); - - set<string> dbs; - { - Lock::DBRead read( "local" ); - dbHolder().getAllShortNames( dbs ); - } - - for ( set<string>::iterator i = dbs.begin(); i != dbs.end(); ++i ) { - string db = *i; - Client::ReadContext ctx( db ); - BSONObjBuilder temp( record.subobjStart( db ) ); - ctx.ctx().db()->recordStats().record( temp ); - temp.done(); - } - - return record.obj(); - } - - } recordStats; - - } -} diff --git a/src/mongo/db/storage/record.h b/src/mongo/db/storage/record.h index db6a703b0ae..e50e3f1bd32 100644 --- a/src/mongo/db/storage/record.h +++ b/src/mongo/db/storage/record.h @@ -52,25 +52,26 @@ namespace mongo { public: enum HeaderSizeValue { HeaderSize = 16 }; - int lengthWithHeaders() const { _accessing(); return _lengthWithHeaders; } - int& lengthWithHeaders() { _accessing(); return _lengthWithHeaders; } + int lengthWithHeaders() const { return _lengthWithHeaders; } + int& lengthWithHeaders() { return _lengthWithHeaders; } - int extentOfs() const { _accessing(); return _extentOfs; } - int& extentOfs() { _accessing(); return _extentOfs; } + int extentOfs() const { return _extentOfs; } + int& extentOfs() { return _extentOfs; } - int nextOfs() const { _accessing(); return _nextOfs; } - int& nextOfs() { _accessing(); return _nextOfs; } + int nextOfs() const { return _nextOfs; } + int& nextOfs() { return _nextOfs; } - int prevOfs() const { _accessing(); return _prevOfs; } - int& prevOfs() { _accessing(); return _prevOfs; } + int prevOfs() const { return _prevOfs; } + int& prevOfs() { return _prevOfs; } - const char * data() const { _accessing(); return _data; } - char * data() { _accessing(); return _data; } + const char* data() const { return _data; } + char* data() { return _data; } - const char * dataNoThrowing() const { return _data; } - char * dataNoThrowing() { return _data; } + // XXX remove + const char* dataNoThrowing() const { return _data; } + char* dataNoThrowing() { return _data; } - int netLength() const { _accessing(); return _netLength(); } + int netLength() const { return _netLength(); } /* use this when a record is deleted. basically a union with next/prev fields */ DeletedRecord& asDeleted() { return *((DeletedRecord*) this); } @@ -81,50 +82,13 @@ namespace mongo { int nextOfs; int prevOfs; }; + NP* np() { return (NP*) &_nextOfs; } - // --------------------- - // memory cache - // --------------------- - - /** - * touches the data so that is in physical memory - * @param entireRecrd if false, only the header and first byte is touched - * if true, the entire record is touched - * */ - void touch( bool entireRecrd = false ) const; - - /** - * @return if this record is likely in physical memory - * its not guaranteed because its possible it gets swapped out in a very unlucky windows - */ - bool likelyInPhysicalMemory() const ; - - /** - * tell the cache this Record was accessed - * @return this, for simple chaining - */ - Record* accessed(); - - static bool likelyInPhysicalMemory( const char* data ); - - /** - * this adds stats about page fault exceptions currently - * specically how many times we call _accessing where the record is not in memory - * and how many times we throw a PageFaultException - */ - static void appendStats( BSONObjBuilder& b ); - - static void appendWorkingSetInfo( BSONObjBuilder& b ); private: int _netLength() const { return _lengthWithHeaders - HeaderSize; } - /** - * call this when accessing a field which could hit disk - */ - void _accessing() const; - int _lengthWithHeaders; int _extentOfs; int _nextOfs; @@ -143,31 +107,23 @@ namespace mongo { class DeletedRecord { public: - int lengthWithHeaders() const { _accessing(); return _lengthWithHeaders; } - int& lengthWithHeaders() { _accessing(); return _lengthWithHeaders; } + int lengthWithHeaders() const { return _lengthWithHeaders; } + int& lengthWithHeaders() { return _lengthWithHeaders; } - int extentOfs() const { _accessing(); return _extentOfs; } - int& extentOfs() { _accessing(); return _extentOfs; } + int extentOfs() const { return _extentOfs; } + int& extentOfs() { return _extentOfs; } // TODO: we need to not const_cast here but problem is DiskLoc::writing - DiskLoc& nextDeleted() const { _accessing(); return const_cast<DiskLoc&>(_nextDeleted); } + DiskLoc& nextDeleted() const { return const_cast<DiskLoc&>(_nextDeleted); } private: - - void _accessing() const; - int _lengthWithHeaders; + int _extentOfs; + DiskLoc _nextDeleted; }; BOOST_STATIC_ASSERT( 16 == sizeof(DeletedRecord) ); - struct RecordStats { - void record( BSONObjBuilder& b ); - - AtomicInt64 accessesNotInMemory; - AtomicInt64 pageFaultExceptionsThrown; - }; - -} +} // namespace mongo diff --git a/src/mongo/db/structure/btree/btree_stats.cpp b/src/mongo/db/structure/btree/btree_stats.cpp deleted file mode 100644 index ad8ea10e173..00000000000 --- a/src/mongo/db/structure/btree/btree_stats.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// btree_stats.cpp - -/** -* Copyright (C) 2008 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - -#include "mongo/pch.h" - -#include "mongo/base/init.h" -#include "mongo/db/structure/btree/btree_stats.h" - -namespace mongo { - - - IndexCounters::IndexCounters() - : ServerStatusSection( "indexCounters" ) { - - _memSupported = ProcessInfo().blockCheckSupported(); - - _btreeMemHits = 0; - _btreeMemMisses = 0; - _btreeAccesses = 0; - - - _maxAllowed = ( numeric_limits< long long >::max() ) / 2; - _resets = 0; - } - - IndexCounters::~IndexCounters(){ - } - - BSONObj IndexCounters::generateSection(const BSONElement& configElement) const { - if ( ! _memSupported ) { - return BSON( "note" << "not supported on this platform" ); - } - - BSONObjBuilder bb; - bb.appendNumber( "accesses" , _btreeAccesses ); - bb.appendNumber( "hits" , _btreeMemHits ); - bb.appendNumber( "misses" , _btreeMemMisses ); - - bb.append( "resets" , _resets ); - - bb.append( "missRatio" , (_btreeAccesses ? (_btreeMemMisses / (double)_btreeAccesses) : 0) ); - - return bb.obj(); - } - - void IndexCounters::_roll() { - _btreeAccesses = 0; - _btreeMemMisses = 0; - _btreeMemHits = 0; - _resets++; - } - - IndexCounters* globalIndexCounters = NULL; - - MONGO_INITIALIZER_WITH_PREREQUISITES(BtreeIndexCountersBlockSupported, - ("SystemInfo"))(InitializerContext* cx) { - if (globalIndexCounters == NULL) { - globalIndexCounters = new IndexCounters(); - } - return Status::OK(); - } - -} diff --git a/src/mongo/db/structure/btree/btree_stats.h b/src/mongo/db/structure/btree/btree_stats.h deleted file mode 100644 index 249c29ed76c..00000000000 --- a/src/mongo/db/structure/btree/btree_stats.h +++ /dev/null @@ -1,86 +0,0 @@ -// btree_stats.h - -/** -* Copyright (C) 2008-2012 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - - -#pragma once - -#include "mongo/db/commands/server_status.h" -#include "mongo/db/pdfile.h" -#include "mongo/db/storage/record.h" -#include "mongo/util/processinfo.h" - -namespace mongo { - - - class IndexCounters : public ServerStatusSection { - public: - IndexCounters(); - virtual ~IndexCounters(); - - virtual bool includeByDefault() const { return true; } - - virtual BSONObj generateSection(const BSONElement& configElement) const; - - - // used without a mutex intentionally (can race) - void btree( const char* node ) { - if ( ! _memSupported ) - return; - btree( Record::likelyInPhysicalMemory( node ) ); - } - - void btree( bool memHit ) { - if ( memHit ) - _btreeMemHits++; - else - _btreeMemMisses++; - if ( _btreeAccesses++ > _maxAllowed ) - _roll(); - - } - void btreeHit() { _btreeMemHits++; _btreeAccesses++; } - void btreeMiss() { _btreeMemMisses++; _btreeAccesses++; } - - private: - - void _roll(); - - bool _memSupported; - - int _resets; - long long _maxAllowed; - - long long _btreeMemMisses; - long long _btreeMemHits; - long long _btreeAccesses; - }; - - extern IndexCounters* globalIndexCounters; -} diff --git a/src/mongo/db/structure/collection_compact.cpp b/src/mongo/db/structure/collection_compact.cpp index e0225ee2154..793cfd5d096 100644 --- a/src/mongo/db/structure/collection_compact.cpp +++ b/src/mongo/db/structure/collection_compact.cpp @@ -43,6 +43,7 @@ #include "mongo/db/structure/catalog/namespace_details.h" #include "mongo/db/storage/extent.h" #include "mongo/db/storage/extent_manager.h" +#include "mongo/db/storage/record.h" #include "mongo/db/storage/transaction.h" #include "mongo/util/touch_pages.h" |