/** * Copyright (C) 2008-2014 MongoDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication #include "mongo/platform/basic.h" #include "mongo/db/prefetch.h" #include "mongo/db/catalog/collection.h" #include "mongo/db/catalog/database.h" #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/commands/server_status_metric.h" #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/dbhelpers.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/jsobj.h" #include "mongo/db/repl/repl_settings.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/db/server_parameters.h" #include "mongo/db/stats/timer_stats.h" #include "mongo/db/storage/mmap_v1/mmap.h" #include "mongo/util/log.h" namespace mongo { using std::endl; using std::string; namespace repl { namespace { // todo / idea: the prefetcher, when it fetches _id, on an upsert, will see if the record exists. if // it does not, at write time, we can just do an insert, which will be faster. // The count (of batches) and time spent fetching pages before application // -- meaning depends on the prefetch behavior: all, _id index, none, etc.) TimerStats prefetchIndexStats; ServerStatusMetricField displayPrefetchIndexPages("repl.preload.indexes", &prefetchIndexStats); TimerStats prefetchDocStats; ServerStatusMetricField displayPrefetchDocPages("repl.preload.docs", &prefetchDocStats); // page in pages needed for all index lookups on a given object void prefetchIndexPages(OperationContext* opCtx, Collection* collection, const ReplSettings::IndexPrefetchConfig& prefetchConfig, const BSONObj& obj) { // do we want prefetchConfig to be (1) as-is, (2) for update ops only, or (3) configured per op // type? One might want PREFETCH_NONE for updates, but it's more rare that it is a bad idea for // inserts. #3 (per op), a big issue would be "too many knobs". switch (prefetchConfig) { case ReplSettings::IndexPrefetchConfig::PREFETCH_NONE: return; case ReplSettings::IndexPrefetchConfig::PREFETCH_ID_ONLY: { TimerHolder timer(&prefetchIndexStats); // on the update op case, the call to prefetchRecordPages will touch the _id index. // thus perhaps this option isn't very useful? try { IndexDescriptor* desc = collection->getIndexCatalog()->findIdIndex(opCtx); if (!desc) return; IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex(desc); invariant(iam); iam->touch(opCtx, obj).transitional_ignore(); } catch (const DBException& e) { LOG(2) << "ignoring exception in prefetchIndexPages(): " << redact(e); } break; } case ReplSettings::IndexPrefetchConfig::PREFETCH_ALL: { // indexCount includes all indexes, including ones // in the process of being built IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(opCtx, true); while (ii.more()) { TimerHolder timer(&prefetchIndexStats); // This will page in all index pages for the given object. try { IndexDescriptor* desc = ii.next(); IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex(desc); verify(iam); iam->touch(opCtx, obj).transitional_ignore(); } catch (const DBException& e) { LOG(2) << "ignoring exception in prefetchIndexPages(): " << redact(e); } } break; } default: fassertFailed(16427); } } // page in the data pages for a record associated with an object void prefetchRecordPages(OperationContext* opCtx, Database* db, const char* ns, const BSONObj& obj) { BSONElement _id; if (obj.getObjectID(_id)) { TimerHolder timer(&prefetchDocStats); BSONObjBuilder builder; builder.append(_id); BSONObj result; try { if (Helpers::findById(opCtx, db, ns, builder.done(), result)) { // do we want to use Record::touch() here? it's pretty similar. // volatile - avoid compiler optimizations for touching a mmap page volatile char _dummy_char = '\0'; // NOLINT // Touch the first word on every page in order to fault it into memory for (int i = 0; i < result.objsize(); i += getMinOSPageSizeBytes()) { _dummy_char += *(result.objdata() + i); } // hit the last page, in case we missed it above _dummy_char += *(result.objdata() + result.objsize() - 1); } } catch (const DBException& e) { LOG(2) << "ignoring exception in prefetchRecordPages(): " << redact(e); } } } } // namespace // prefetch for an oplog operation void prefetchPagesForReplicatedOp(OperationContext* opCtx, Database* db, const BSONObj& op) { invariant(db); const ReplSettings::IndexPrefetchConfig prefetchConfig = getGlobalReplicationCoordinator()->getIndexPrefetchConfig(); const char* opField; const char* opType = op.getStringField("op"); switch (*opType) { case 'i': // insert case 'd': // delete opField = "o"; break; case 'u': // update opField = "o2"; break; default: // prefetch ignores other ops return; } BSONObj obj = op.getObjectField(opField); const char* ns = op.getStringField("ns"); // This will have to change for engines other than MMAP V1, because they might not have // means for directly prefetching pages from the collection. For this purpose, acquire S // lock on the database, instead of optimizing with IS. Lock::CollectionLock collLock(opCtx->lockState(), ns, MODE_S); Collection* collection = db->getCollection(opCtx, ns); if (!collection) { return; } LOG(4) << "index prefetch for op " << *opType; // should we prefetch index pages on updates? if the update is in-place and doesn't change // indexed values, it is actually slower - a lot slower if there are a dozen indexes or // lots of multikeys. possible variations (not all mutually exclusive): // 1) current behavior: full prefetch // 2) don't do it for updates // 3) don't do multikey indexes for updates // 4) don't prefetchIndexPages on some heuristic; e.g., if it's an $inc. // 5) if not prefetching index pages (#2), we should do it if we are upsertings and it // will be an insert. to do that we could do the prefetchRecordPage first and if DNE // then we do #1. // // note that on deletes 'obj' does not have all the keys we would want to prefetch on. // a way to achieve that would be to prefetch the record first, and then afterwards do // this part. // prefetchIndexPages(opCtx, collection, prefetchConfig, obj); // do not prefetch the data for inserts; it doesn't exist yet // // we should consider doing the record prefetch for the delete op case as we hit the record // when we delete. note if done we only want to touch the first page. // // update: do record prefetch. if ((*opType == 'u') && // do not prefetch the data for capped collections because // they typically do not have an _id index for findById() to use. !collection->isCapped()) { prefetchRecordPages(opCtx, db, ns, obj); } } class ReplIndexPrefetch : public ServerParameter { public: ReplIndexPrefetch() : ServerParameter(ServerParameterSet::getGlobal(), "replIndexPrefetch") {} virtual ~ReplIndexPrefetch() {} const char* _value() { if (getGlobalReplicationCoordinator()->getReplicationMode() != ReplicationCoordinator::modeReplSet) { return "uninitialized"; } ReplSettings::IndexPrefetchConfig ip = getGlobalReplicationCoordinator()->getIndexPrefetchConfig(); switch (ip) { case ReplSettings::IndexPrefetchConfig::PREFETCH_NONE: return "none"; case ReplSettings::IndexPrefetchConfig::PREFETCH_ID_ONLY: return "_id_only"; case ReplSettings::IndexPrefetchConfig::PREFETCH_ALL: return "all"; default: return "invalid"; } } virtual void append(OperationContext* opCtx, BSONObjBuilder& b, const string& name) { b.append(name, _value()); } virtual Status set(const BSONElement& newValueElement) { if (getGlobalReplicationCoordinator()->getReplicationMode() != ReplicationCoordinator::modeReplSet) { return Status(ErrorCodes::BadValue, "replication is not enabled"); } std::string prefetch = newValueElement.valuestrsafe(); return setFromString(prefetch); } virtual Status setFromString(const string& prefetch) { log() << "changing replication index prefetch behavior to " << prefetch; ReplSettings::IndexPrefetchConfig prefetchConfig; if (prefetch == "none") prefetchConfig = ReplSettings::IndexPrefetchConfig::PREFETCH_NONE; else if (prefetch == "_id_only") prefetchConfig = ReplSettings::IndexPrefetchConfig::PREFETCH_ID_ONLY; else if (prefetch == "all") prefetchConfig = ReplSettings::IndexPrefetchConfig::PREFETCH_ALL; else { return Status(ErrorCodes::BadValue, str::stream() << "unrecognized indexPrefetch setting: " << prefetch); } getGlobalReplicationCoordinator()->setIndexPrefetchConfig(prefetchConfig); return Status::OK(); } } replIndexPrefetch; } // namespace repl } // namespace mongo