// record_store_v1_base.cpp /** * Copyright (C) 2013-2014 MongoDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage #include "mongo/db/storage/mmap_v1/record_store_v1_base.h" #include #include "mongo/db/catalog/collection.h" #include "mongo/db/operation_context.h" #include "mongo/db/storage/mmap_v1/extent.h" #include "mongo/db/storage/mmap_v1/extent_manager.h" #include "mongo/db/storage/mmap_v1/record.h" #include "mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h" #include "mongo/util/log.h" #include "mongo/util/progress_meter.h" #include "mongo/util/timer.h" #include "mongo/util/touch_pages.h" namespace mongo { using boost::scoped_ptr; using std::set; using std::string; /* Deleted list buckets are used to quickly locate free space based on size. Each bucket contains records up to that size (meaning a record with a size exactly equal to bucketSizes[n] would go into bucket n+1). */ const int RecordStoreV1Base::bucketSizes[] = { 0x20, 0x40, 0x80, 0x100, // 32, 64, 128, 256 0x200, 0x400, 0x800, 0x1000, // 512, 1K, 2K, 4K 0x2000, 0x4000, 0x8000, 0x10000, // 8K, 16K, 32K, 64K 0x20000, 0x40000, 0x80000, 0x100000, // 128K, 256K, 512K, 1M 0x200000, 0x400000, 0x600000, 0x800000, // 2M, 4M, 6M, 8M 0xA00000, 0xC00000, 0xE00000, // 10M, 12M, 14M, MaxAllowedAllocation, // 16.5M MaxAllowedAllocation + 1, // Only MaxAllowedAllocation sized records go here. INT_MAX, // "oversized" bucket for unused parts of extents. }; // If this fails, it means that bucketSizes doesn't have the correct number of entries. BOOST_STATIC_ASSERT(sizeof(RecordStoreV1Base::bucketSizes) / sizeof(RecordStoreV1Base::bucketSizes[0]) == RecordStoreV1Base::Buckets); SavedCursorRegistry::~SavedCursorRegistry() { for (SavedCursorSet::iterator it = _cursors.begin(); it != _cursors.end(); it++) { (*it)->_registry = NULL; // prevent SavedCursor destructor from accessing this } } void SavedCursorRegistry::registerCursor(SavedCursor* cursor) { invariant(!cursor->_registry); cursor->_registry = this; scoped_spinlock lock(_mutex); _cursors.insert(cursor); } bool SavedCursorRegistry::unregisterCursor(SavedCursor* cursor) { if (!cursor->_registry) { return false; } invariant(cursor->_registry == this); cursor->_registry = NULL; scoped_spinlock lock(_mutex); invariant(_cursors.erase(cursor)); return true; } void SavedCursorRegistry::invalidateCursorsForBucket(DiskLoc bucket) { // While this is not strictly necessary as an exclusive collection lock will be held, // it's cleaner to just make the SavedCursorRegistry thread-safe. Spinlock is OK here. scoped_spinlock lock(_mutex); for (SavedCursorSet::iterator it = _cursors.begin(); it != _cursors.end();) { if ((*it)->bucket == bucket) { (*it)->_registry = NULL; // prevent ~SavedCursor from trying to unregister _cursors.erase(it++); } else { it++; } } } RecordStoreV1Base::RecordStoreV1Base(const StringData& ns, RecordStoreV1MetaData* details, ExtentManager* em, bool isSystemIndexes) : RecordStore(ns), _details(details), _extentManager(em), _isSystemIndexes(isSystemIndexes) {} RecordStoreV1Base::~RecordStoreV1Base() {} int64_t RecordStoreV1Base::storageSize(OperationContext* txn, BSONObjBuilder* extraInfo, int level) const { BSONArrayBuilder extentInfo; int64_t total = 0; int n = 0; DiskLoc cur = _details->firstExtent(txn); while (!cur.isNull()) { Extent* e = _extentManager->getExtent(cur); total += e->length; n++; if (extraInfo && level > 0) { extentInfo.append(BSON("len" << e->length << "loc: " << e->myLoc.toBSONObj())); } cur = e->xnext; } if (extraInfo) { extraInfo->append("numExtents", n); if (level > 0) extraInfo->append("extents", extentInfo.arr()); } return total; } RecordData RecordStoreV1Base::dataFor(OperationContext* txn, const RecordId& loc) const { return recordFor(DiskLoc::fromRecordId(loc))->toRecordData(); } bool RecordStoreV1Base::findRecord(OperationContext* txn, const RecordId& loc, RecordData* rd) const { // this is a bit odd, as the semantics of using the storage engine imply it _has_ to be. // And in fact we can't actually check. // So we assume the best. Record* rec = recordFor(DiskLoc::fromRecordId(loc)); if (!rec) { return false; } *rd = rec->toRecordData(); return true; } Record* RecordStoreV1Base::recordFor(const DiskLoc& loc) const { return _extentManager->recordForV1(loc); } const DeletedRecord* RecordStoreV1Base::deletedRecordFor(const DiskLoc& loc) const { invariant(loc.a() != -1); return reinterpret_cast(recordFor(loc)); } DeletedRecord* RecordStoreV1Base::drec(const DiskLoc& loc) const { invariant(loc.a() != -1); return reinterpret_cast(recordFor(loc)); } Extent* RecordStoreV1Base::_getExtent(OperationContext* txn, const DiskLoc& loc) const { return _extentManager->getExtent(loc); } DiskLoc RecordStoreV1Base::_getExtentLocForRecord(OperationContext* txn, const DiskLoc& loc) const { return _extentManager->extentLocForV1(loc); } DiskLoc RecordStoreV1Base::getNextRecord(OperationContext* txn, const DiskLoc& loc) const { DiskLoc next = getNextRecordInExtent(txn, loc); if (!next.isNull()) { return next; } // now traverse extents Extent* e = _getExtent(txn, _getExtentLocForRecord(txn, loc)); while (1) { if (e->xnext.isNull()) return DiskLoc(); // end of collection e = _getExtent(txn, e->xnext); if (!e->firstRecord.isNull()) break; // entire extent could be empty, keep looking } return e->firstRecord; } DiskLoc RecordStoreV1Base::getPrevRecord(OperationContext* txn, const DiskLoc& loc) const { DiskLoc prev = getPrevRecordInExtent(txn, loc); if (!prev.isNull()) { return prev; } // now traverse extents Extent* e = _getExtent(txn, _getExtentLocForRecord(txn, loc)); while (1) { if (e->xprev.isNull()) return DiskLoc(); // end of collection e = _getExtent(txn, e->xprev); if (!e->firstRecord.isNull()) break; // entire extent could be empty, keep looking } return e->lastRecord; } DiskLoc RecordStoreV1Base::_findFirstSpot(OperationContext* txn, const DiskLoc& extDiskLoc, Extent* e) { DiskLoc emptyLoc = extDiskLoc; emptyLoc.inc(Extent::HeaderSize()); int delRecLength = e->length - Extent::HeaderSize(); if (delRecLength >= 32 * 1024 && _ns.find('$') != string::npos && !isCapped()) { // probably an index. so skip forward to keep its records page aligned int& ofs = emptyLoc.GETOFS(); int newOfs = (ofs + 0xfff) & ~0xfff; delRecLength -= (newOfs - ofs); dassert(delRecLength > 0); ofs = newOfs; } DeletedRecord* empty = txn->recoveryUnit()->writing(drec(emptyLoc)); empty->lengthWithHeaders() = delRecLength; empty->extentOfs() = e->myLoc.getOfs(); empty->nextDeleted().Null(); return emptyLoc; } DiskLoc RecordStoreV1Base::getNextRecordInExtent(OperationContext* txn, const DiskLoc& loc) const { int nextOffset = recordFor(loc)->nextOfs(); if (nextOffset == DiskLoc::NullOfs) return DiskLoc(); fassert(17441, abs(nextOffset) >= 8); // defensive DiskLoc result(loc.a(), nextOffset); return result; } DiskLoc RecordStoreV1Base::getPrevRecordInExtent(OperationContext* txn, const DiskLoc& loc) const { int prevOffset = recordFor(loc)->prevOfs(); if (prevOffset == DiskLoc::NullOfs) return DiskLoc(); fassert(17442, abs(prevOffset) >= 8); // defensive DiskLoc result(loc.a(), prevOffset); return result; } RecordFetcher* RecordStoreV1Base::recordNeedsFetch(OperationContext* txn, const RecordId& loc) const { return _extentManager->recordNeedsFetch(DiskLoc::fromRecordId(loc)); } StatusWith RecordStoreV1Base::insertRecord(OperationContext* txn, const DocWriter* doc, bool enforceQuota) { int docSize = doc->documentSize(); if (docSize < 4) { return StatusWith(ErrorCodes::InvalidLength, "record has to be >= 4 bytes"); } const int lenWHdr = docSize + Record::HeaderSize; if (lenWHdr > MaxAllowedAllocation) { return StatusWith(ErrorCodes::InvalidLength, "record has to be <= 16.5MB"); } const int lenToAlloc = (doc->addPadding() && shouldPadInserts()) ? quantizeAllocationSpace(lenWHdr) : lenWHdr; StatusWith loc = allocRecord(txn, lenToAlloc, enforceQuota); if (!loc.isOK()) return StatusWith(loc.getStatus()); Record* r = recordFor(loc.getValue()); fassert(17319, r->lengthWithHeaders() >= lenWHdr); r = reinterpret_cast(txn->recoveryUnit()->writingPtr(r, lenWHdr)); doc->writeDocument(r->data()); _addRecordToRecListInExtent(txn, r, loc.getValue()); _details->incrementStats(txn, r->netLength(), 1); return StatusWith(loc.getValue().toRecordId()); } StatusWith RecordStoreV1Base::insertRecord(OperationContext* txn, const char* data, int len, bool enforceQuota) { if (len < 4) { return StatusWith(ErrorCodes::InvalidLength, "record has to be >= 4 bytes"); } if (len + Record::HeaderSize > MaxAllowedAllocation) { return StatusWith(ErrorCodes::InvalidLength, "record has to be <= 16.5MB"); } return _insertRecord(txn, data, len, enforceQuota); } StatusWith RecordStoreV1Base::_insertRecord(OperationContext* txn, const char* data, int len, bool enforceQuota) { const int lenWHdr = len + Record::HeaderSize; const int lenToAlloc = shouldPadInserts() ? quantizeAllocationSpace(lenWHdr) : lenWHdr; fassert(17208, lenToAlloc >= lenWHdr); StatusWith loc = allocRecord(txn, lenToAlloc, enforceQuota); if (!loc.isOK()) return StatusWith(loc.getStatus()); Record* r = recordFor(loc.getValue()); fassert(17210, r->lengthWithHeaders() >= lenWHdr); // copy the data r = reinterpret_cast(txn->recoveryUnit()->writingPtr(r, lenWHdr)); memcpy(r->data(), data, len); _addRecordToRecListInExtent(txn, r, loc.getValue()); _details->incrementStats(txn, r->netLength(), 1); return StatusWith(loc.getValue().toRecordId()); } StatusWith RecordStoreV1Base::updateRecord(OperationContext* txn, const RecordId& oldLocation, const char* data, int dataSize, bool enforceQuota, UpdateNotifier* notifier) { Record* oldRecord = recordFor(DiskLoc::fromRecordId(oldLocation)); if (oldRecord->netLength() >= dataSize) { // Make sure to notify other queries before we do an in-place update. if (notifier) { Status callbackStatus = notifier->recordStoreGoingToUpdateInPlace(txn, oldLocation); if (!callbackStatus.isOK()) return StatusWith(callbackStatus); } // we fit memcpy(txn->recoveryUnit()->writingPtr(oldRecord->data(), dataSize), data, dataSize); return StatusWith(oldLocation); } if (isCapped()) return StatusWith( ErrorCodes::InternalError, "failing update: objects in a capped ns cannot grow", 10003); // we have to move if (dataSize + Record::HeaderSize > MaxAllowedAllocation) { return StatusWith(ErrorCodes::InvalidLength, "record has to be <= 16.5MB"); } StatusWith newLocation = _insertRecord(txn, data, dataSize, enforceQuota); if (!newLocation.isOK()) return newLocation; // insert worked, so we delete old record if (notifier) { Status moveStatus = notifier->recordStoreGoingToMove( txn, oldLocation, oldRecord->data(), oldRecord->netLength()); if (!moveStatus.isOK()) return StatusWith(moveStatus); } deleteRecord(txn, oldLocation); return newLocation; } bool RecordStoreV1Base::updateWithDamagesSupported() const { return true; } Status RecordStoreV1Base::updateWithDamages(OperationContext* txn, const RecordId& loc, const RecordData& oldRec, const char* damageSource, const mutablebson::DamageVector& damages) { Record* rec = recordFor(DiskLoc::fromRecordId(loc)); char* root = rec->data(); // All updates were in place. Apply them via durability and writing pointer. mutablebson::DamageVector::const_iterator where = damages.begin(); const mutablebson::DamageVector::const_iterator end = damages.end(); for (; where != end; ++where) { const char* sourcePtr = damageSource + where->sourceOffset; void* targetPtr = txn->recoveryUnit()->writingPtr(root + where->targetOffset, where->size); std::memcpy(targetPtr, sourcePtr, where->size); } return Status::OK(); } void RecordStoreV1Base::deleteRecord(OperationContext* txn, const RecordId& rid) { const DiskLoc dl = DiskLoc::fromRecordId(rid); Record* todelete = recordFor(dl); invariant(todelete->netLength() >= 4); // this is required for defensive code /* remove ourself from the record next/prev chain */ { if (todelete->prevOfs() != DiskLoc::NullOfs) { DiskLoc prev = getPrevRecordInExtent(txn, dl); Record* prevRecord = recordFor(prev); txn->recoveryUnit()->writingInt(prevRecord->nextOfs()) = todelete->nextOfs(); } if (todelete->nextOfs() != DiskLoc::NullOfs) { DiskLoc next = getNextRecord(txn, dl); Record* nextRecord = recordFor(next); txn->recoveryUnit()->writingInt(nextRecord->prevOfs()) = todelete->prevOfs(); } } /* remove ourself from extent pointers */ { DiskLoc extentLoc = todelete->myExtentLoc(dl); Extent* e = _getExtent(txn, extentLoc); if (e->firstRecord == dl) { txn->recoveryUnit()->writing(&e->firstRecord); if (todelete->nextOfs() == DiskLoc::NullOfs) e->firstRecord.Null(); else e->firstRecord.set(dl.a(), todelete->nextOfs()); } if (e->lastRecord == dl) { txn->recoveryUnit()->writing(&e->lastRecord); if (todelete->prevOfs() == DiskLoc::NullOfs) e->lastRecord.Null(); else e->lastRecord.set(dl.a(), todelete->prevOfs()); } } /* add to the free list */ { _details->incrementStats(txn, -1 * todelete->netLength(), -1); if (_isSystemIndexes) { /* temp: if in system.indexes, don't reuse, and zero out: we want to be careful until validated more, as IndexDetails has pointers to this disk location. so an incorrectly done remove would cause a lot of problems. */ memset(txn->recoveryUnit()->writingPtr(todelete, todelete->lengthWithHeaders()), 0, todelete->lengthWithHeaders()); } else { // this is defensive so we can detect if we are still using a location // that was deleted memset(txn->recoveryUnit()->writingPtr(todelete->data(), 4), 0xee, 4); addDeletedRec(txn, dl); } } } RecordIterator* RecordStoreV1Base::getIteratorForRepair(OperationContext* txn) const { return new RecordStoreV1RepairIterator(txn, this); } void RecordStoreV1Base::_addRecordToRecListInExtent(OperationContext* txn, Record* r, DiskLoc loc) { dassert(recordFor(loc) == r); DiskLoc extentLoc = _getExtentLocForRecord(txn, loc); Extent* e = _getExtent(txn, extentLoc); if (e->lastRecord.isNull()) { *txn->recoveryUnit()->writing(&e->firstRecord) = loc; *txn->recoveryUnit()->writing(&e->lastRecord) = loc; r->prevOfs() = r->nextOfs() = DiskLoc::NullOfs; } else { Record* oldlast = recordFor(e->lastRecord); r->prevOfs() = e->lastRecord.getOfs(); r->nextOfs() = DiskLoc::NullOfs; txn->recoveryUnit()->writingInt(oldlast->nextOfs()) = loc.getOfs(); *txn->recoveryUnit()->writing(&e->lastRecord) = loc; } } void RecordStoreV1Base::increaseStorageSize(OperationContext* txn, int size, bool enforceQuota) { DiskLoc eloc = _extentManager->allocateExtent(txn, isCapped(), size, enforceQuota); Extent* e = _extentManager->getExtent(eloc); invariant(e); *txn->recoveryUnit()->writing(&e->nsDiagnostic) = _ns; txn->recoveryUnit()->writing(&e->xnext)->Null(); txn->recoveryUnit()->writing(&e->xprev)->Null(); txn->recoveryUnit()->writing(&e->firstRecord)->Null(); txn->recoveryUnit()->writing(&e->lastRecord)->Null(); DiskLoc emptyLoc = _findFirstSpot(txn, eloc, e); if (_details->lastExtent(txn).isNull()) { invariant(_details->firstExtent(txn).isNull()); _details->setFirstExtent(txn, eloc); _details->setLastExtent(txn, eloc); _details->setCapExtent(txn, eloc); invariant(e->xprev.isNull()); invariant(e->xnext.isNull()); } else { invariant(!_details->firstExtent(txn).isNull()); *txn->recoveryUnit()->writing(&e->xprev) = _details->lastExtent(txn); *txn->recoveryUnit()->writing( &_extentManager->getExtent(_details->lastExtent(txn))->xnext) = eloc; _details->setLastExtent(txn, eloc); } _details->setLastExtentSize(txn, e->length); addDeletedRec(txn, emptyLoc); } Status RecordStoreV1Base::validate(OperationContext* txn, bool full, bool scanData, ValidateAdaptor* adaptor, ValidateResults* results, BSONObjBuilder* output) { // 1) basic status that require no iteration // 2) extent level info // 3) check extent start and end // 4) check each non-deleted record // 5) check deleted list // ------------- // 1111111111111111111 if (isCapped()) { output->appendBool("capped", true); output->appendNumber("max", _details->maxCappedDocs()); } output->appendNumber("datasize", _details->dataSize()); output->appendNumber("nrecords", _details->numRecords()); output->appendNumber("lastExtentSize", _details->lastExtentSize(txn)); if (_details->firstExtent(txn).isNull()) output->append("firstExtent", "null"); else output->append("firstExtent", str::stream() << _details->firstExtent(txn).toString() << " ns:" << _getExtent(txn, _details->firstExtent(txn))->nsDiagnostic.toString()); if (_details->lastExtent(txn).isNull()) output->append("lastExtent", "null"); else output->append("lastExtent", str::stream() << _details->lastExtent(txn).toString() << " ns:" << _getExtent(txn, _details->lastExtent(txn))->nsDiagnostic.toString()); // 22222222222222222222222222 { // validate extent basics BSONArrayBuilder extentData; int extentCount = 0; DiskLoc extentDiskLoc; try { if (!_details->firstExtent(txn).isNull()) { _getExtent(txn, _details->firstExtent(txn))->assertOk(); _getExtent(txn, _details->lastExtent(txn))->assertOk(); } extentDiskLoc = _details->firstExtent(txn); while (!extentDiskLoc.isNull()) { Extent* thisExtent = _getExtent(txn, extentDiskLoc); if (full) { extentData << thisExtent->dump(); } if (!thisExtent->validates(extentDiskLoc, &results->errors)) { results->valid = false; } DiskLoc nextDiskLoc = thisExtent->xnext; if (extentCount > 0 && !nextDiskLoc.isNull() && _getExtent(txn, nextDiskLoc)->xprev != extentDiskLoc) { StringBuilder sb; sb << "'xprev' pointer " << _getExtent(txn, nextDiskLoc)->xprev.toString() << " in extent " << nextDiskLoc.toString() << " does not point to extent " << extentDiskLoc.toString(); results->errors.push_back(sb.str()); results->valid = false; } if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) { StringBuilder sb; sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString() << " does not point to last extent in list " << extentDiskLoc.toString(); results->errors.push_back(sb.str()); results->valid = false; } extentDiskLoc = nextDiskLoc; extentCount++; txn->checkForInterrupt(); } } catch (const DBException& e) { StringBuilder sb; sb << "exception validating extent " << extentCount << ": " << e.what(); results->errors.push_back(sb.str()); results->valid = false; return Status::OK(); } output->append("extentCount", extentCount); if (full) output->appendArray("extents", extentData.arr()); } try { // 333333333333333333333333333 bool testingLastExtent = false; try { DiskLoc firstExtentLoc = _details->firstExtent(txn); if (firstExtentLoc.isNull()) { // this is ok } else { output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump()); if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) { StringBuilder sb; sb << "'xprev' pointer in 'firstExtent' " << _details->firstExtent(txn).toString() << " is " << _getExtent(txn, firstExtentLoc)->xprev.toString() << ", should be null"; results->errors.push_back(sb.str()); results->valid = false; } } testingLastExtent = true; DiskLoc lastExtentLoc = _details->lastExtent(txn); if (lastExtentLoc.isNull()) { // this is ok } else { if (firstExtentLoc != lastExtentLoc) { output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump()); if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) { StringBuilder sb; sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString() << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString() << ", should be null"; results->errors.push_back(sb.str()); results->valid = false; } } } } catch (const DBException& e) { StringBuilder sb; sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent") << "': " << e.what(); results->errors.push_back(sb.str()); results->valid = false; } // 4444444444444444444444444 set recs; if (scanData) { int n = 0; int nInvalid = 0; long long nQuantizedSize = 0; long long len = 0; long long nlen = 0; long long bsonLen = 0; int outOfOrder = 0; DiskLoc cl_last; scoped_ptr iterator(getIterator(txn)); DiskLoc cl; while (!(cl = DiskLoc::fromRecordId(iterator->getNext())).isNull()) { n++; if (n < 1000000) recs.insert(cl); if (isCapped()) { if (cl < cl_last) outOfOrder++; cl_last = cl; } Record* r = recordFor(cl); len += r->lengthWithHeaders(); nlen += r->netLength(); if (isQuantized(r->lengthWithHeaders())) { // Count the number of records having a size consistent with // the quantizeAllocationSpace quantization implementation. ++nQuantizedSize; } if (full) { size_t dataSize = 0; const Status status = adaptor->validate(r->toRecordData(), &dataSize); if (!status.isOK()) { results->valid = false; if (nInvalid == 0) // only log once; results->errors.push_back("invalid object detected (see logs)"); nInvalid++; log() << "Invalid object detected in " << _ns << ": " << status.reason(); } else { bsonLen += dataSize; } } } if (isCapped() && !_details->capLooped()) { output->append("cappedOutOfOrder", outOfOrder); if (outOfOrder > 1) { results->valid = false; results->errors.push_back("too many out of order records"); } } output->append("objectsFound", n); if (full) { output->append("invalidObjects", nInvalid); } output->appendNumber("nQuantizedSize", nQuantizedSize); output->appendNumber("bytesWithHeaders", len); output->appendNumber("bytesWithoutHeaders", nlen); if (full) { output->appendNumber("bytesBson", bsonLen); } } // end scanData // 55555555555555555555555555 BSONArrayBuilder deletedListArray; for (int i = 0; i < Buckets; i++) { deletedListArray << _details->deletedListEntry(i).isNull(); } int ndel = 0; long long delSize = 0; BSONArrayBuilder delBucketSizes; int incorrect = 0; for (int i = 0; i < Buckets; i++) { DiskLoc loc = _details->deletedListEntry(i); try { int k = 0; while (!loc.isNull()) { if (recs.count(loc)) incorrect++; ndel++; if (loc.questionable()) { if (isCapped() && !loc.isValid() && i == 1) { /* the constructor for NamespaceDetails intentionally sets * deletedList[1] to invalid see comments in namespace.h */ break; } string err(str::stream() << "bad pointer in deleted record list: " << loc.toString() << " bucket: " << i << " k: " << k); results->errors.push_back(err); results->valid = false; break; } const DeletedRecord* d = deletedRecordFor(loc); delSize += d->lengthWithHeaders(); loc = d->nextDeleted(); k++; txn->checkForInterrupt(); } delBucketSizes << k; } catch (...) { results->errors.push_back((string) "exception in deleted chain for bucket " + BSONObjBuilder::numStr(i)); results->valid = false; } } output->appendNumber("deletedCount", ndel); output->appendNumber("deletedSize", delSize); if (full) { output->append("delBucketSizes", delBucketSizes.arr()); } if (incorrect) { results->errors.push_back(BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list"); results->valid = false; } } catch (AssertionException) { results->errors.push_back("exception during validate"); results->valid = false; } return Status::OK(); } void RecordStoreV1Base::appendCustomStats(OperationContext* txn, BSONObjBuilder* result, double scale) const { result->append("lastExtentSize", _details->lastExtentSize(txn) / scale); result->append("paddingFactor", 1.0); // hard coded result->append("paddingFactorNote", "paddingFactor is unused and unmaintained in 3.0. It " "remains hard coded to 1.0 for compatibility only."); result->append("userFlags", _details->userFlags()); result->appendBool("capped", isCapped()); if (isCapped()) { result->appendNumber("max", _details->maxCappedDocs()); result->appendNumber("maxSize", static_cast(storageSize(txn, NULL, 0) / scale)); } } namespace { struct touch_location { const char* root; size_t length; }; } Status RecordStoreV1Base::touch(OperationContext* txn, BSONObjBuilder* output) const { Timer t; std::vector ranges; { DiskLoc nextLoc = _details->firstExtent(txn); Extent* ext = nextLoc.isNull() ? NULL : _getExtent(txn, nextLoc); while (ext) { touch_location tl; tl.root = reinterpret_cast(ext); tl.length = ext->length; ranges.push_back(tl); nextLoc = ext->xnext; if (nextLoc.isNull()) ext = NULL; else ext = _getExtent(txn, nextLoc); } } std::string progress_msg = "touch " + std::string(txn->getNS()) + " extents"; ProgressMeterHolder pm(*txn->setMessage(progress_msg.c_str(), "Touch Progress", ranges.size())); for (std::vector::iterator it = ranges.begin(); it != ranges.end(); ++it) { touch_pages(it->root, it->length); pm.hit(); txn->checkForInterrupt(); } pm.finished(); if (output) { output->append("numRanges", static_cast(ranges.size())); output->append("millis", t.millis()); } return Status::OK(); } RecordId RecordStoreV1Base::IntraExtentIterator::getNext() { if (_curr.isNull()) return RecordId(); const DiskLoc out = _curr; // we always return where we were, not where we will be. const Record* rec = recordFor(_curr); const int nextOfs = _forward ? rec->nextOfs() : rec->prevOfs(); _curr = (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(_curr.a(), nextOfs)); return out.toRecordId(); ; } void RecordStoreV1Base::IntraExtentIterator::invalidate(const RecordId& rid) { if (rid == _curr.toRecordId()) { getNext(); } } int RecordStoreV1Base::quantizeAllocationSpace(int allocSize) { invariant(allocSize <= MaxAllowedAllocation); for (int i = 0; i < Buckets - 2; i++) { // last two bucketSizes are invalid if (bucketSizes[i] >= allocSize) { // Return the size of the first bucket sized >= the requested size. return bucketSizes[i]; } } invariant(false); // prior invariant means we should find something. } bool RecordStoreV1Base::isQuantized(int recordSize) { if (recordSize > MaxAllowedAllocation) return false; return recordSize == quantizeAllocationSpace(recordSize); } int RecordStoreV1Base::bucket(int size) { for (int i = 0; i < Buckets; i++) { if (bucketSizes[i] > size) { // Return the first bucket sized _larger_ than the requested size. This is important // since we want all records in a bucket to be >= the quantized size, therefore the // quantized size must be the smallest allowed record per bucket. return i; } } // Technically, this is reachable if size == INT_MAX, but it would be an error to pass that // in anyway since it would be impossible to have a record that large given the file and // extent headers. invariant(false); } }