diff options
Diffstat (limited to 'src')
13 files changed, 370 insertions, 84 deletions
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp index 970d6e29c71..537c01040f5 100644 --- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp +++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp @@ -75,7 +75,7 @@ namespace mongo { // For capped case, signal that we are doing initial extent allocation. if ( capped ) { // WAS: cappedLastDelRecLastExtent().setInvalid(); - deletedList[1].setInvalid(); + deletedListSmall[1].setInvalid(); } verify( sizeof(_dataFileVersion) == 2 ); _dataFileVersion = 0; diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h index 5afcd715128..e881c6017f9 100644 --- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h +++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h @@ -39,14 +39,6 @@ namespace mongo { class NamespaceIndex; class OperationContext; - /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes - so you can look for a deleterecord about the right size. - */ - const int Buckets = 19; - const int MaxBucket = 18; - - extern int bucketSizes[]; - #pragma pack(1) /* NamespaceDetails : this is the "header" for a collection that has all its details. It's in the .ns file and this is a memory mapped region (thus the pack pragma above). @@ -55,6 +47,11 @@ namespace mongo { public: enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase = 10 }; + // deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various + // sizes so you can look for a deleted record of about the right size. These buckets are + // split into small and large groups for compatibility with old versions. + static const int SmallBuckets = 18; + static const int LargeBuckets = 8; /*-------- data fields, as present on disk : */ @@ -69,7 +66,8 @@ namespace mongo { changes, this value is updated. !deletedList[1].isValid() when this value is not yet computed. */ - DiskLoc deletedList[Buckets]; + DiskLoc deletedListSmall[SmallBuckets]; + DiskLoc deletedListLegacyGrabBag; // old implementations put records of multiple sizes here. // ofs 168 (8 byte aligned) struct Stats { @@ -113,7 +111,12 @@ namespace mongo { int userFlags; - char _reserved[72]; + DiskLoc deletedListLarge[LargeBuckets]; + + // Think carefully before using this. We need at least 8 bytes reserved to leave room for a + // DiskLoc pointing to more data (eg in a dummy Record or Extent). There is still _reservedA + // above, but these are the final two reserved 8-byte regions. + char _reserved[8]; /*-------- end data 496 bytes */ public: explicit NamespaceDetails( const DiskLoc &loc, bool _capped ); diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp index b78ad7bef59..a444dc71dc3 100644 --- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp +++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp @@ -34,6 +34,9 @@ #include "mongo/db/ops/update.h" namespace mongo { + BOOST_STATIC_ASSERT(RecordStoreV1Base::Buckets + == NamespaceDetails::SmallBuckets + NamespaceDetails::LargeBuckets); + NamespaceDetailsRSV1MetaData::NamespaceDetailsRSV1MetaData( const StringData& ns, NamespaceDetails* details, RecordStore* namespaceRecordStore ) @@ -87,20 +90,44 @@ namespace mongo { s->nrecords = numRecords; } - const DiskLoc& NamespaceDetailsRSV1MetaData::deletedListEntry( int bucket ) const { - return _details->deletedList[ bucket ]; + DiskLoc NamespaceDetailsRSV1MetaData::deletedListEntry( int bucket ) const { + invariant(bucket >= 0 && bucket < RecordStoreV1Base::Buckets); + const DiskLoc head = (bucket < NamespaceDetails::SmallBuckets) + ? _details->deletedListSmall[bucket] + : _details->deletedListLarge[bucket - NamespaceDetails::SmallBuckets]; + + if (head == DiskLoc(0,0)) { + // This will happen the first time we use a "large" bucket since they were previously + // zero-initialized. + return DiskLoc(); + } + + return head; } void NamespaceDetailsRSV1MetaData::setDeletedListEntry( OperationContext* txn, int bucket, const DiskLoc& loc ) { - *txn->recoveryUnit()->writing( &_details->deletedList[bucket] ) = loc; + DiskLoc* head = (bucket < NamespaceDetails::SmallBuckets) + ? &_details->deletedListSmall[bucket] + : &_details->deletedListLarge[bucket - NamespaceDetails::SmallBuckets]; + *txn->recoveryUnit()->writing( head ) = loc; + } + + DiskLoc NamespaceDetailsRSV1MetaData::deletedListLegacyGrabBag() const { + return _details->deletedListLegacyGrabBag; + } + + void NamespaceDetailsRSV1MetaData::setDeletedListLegacyGrabBag(OperationContext* txn, + const DiskLoc& loc) { + *txn->recoveryUnit()->writing(&_details->deletedListLegacyGrabBag) = loc; } void NamespaceDetailsRSV1MetaData::orphanDeletedList( OperationContext* txn ) { - for( int i = 0; i < Buckets; i++ ) { + for( int i = 0; i < RecordStoreV1Base::Buckets; i++ ) { setDeletedListEntry( txn, i, DiskLoc() ); } + setDeletedListLegacyGrabBag(txn, DiskLoc()); } const DiskLoc& NamespaceDetailsRSV1MetaData::firstExtent( OperationContext* txn ) const { diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h index 609691ec5ed..61421a34644 100644 --- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h +++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h @@ -71,10 +71,14 @@ namespace mongo { long long dataSize, long long numRecords ); - virtual const DiskLoc& deletedListEntry( int bucket ) const; + virtual DiskLoc deletedListEntry( int bucket ) const; virtual void setDeletedListEntry( OperationContext* txn, int bucket, const DiskLoc& loc ); + + virtual DiskLoc deletedListLegacyGrabBag() const; + virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc); + virtual void orphanDeletedList(OperationContext* txn); virtual const DiskLoc& firstExtent( OperationContext* txn ) const; diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp index a7ea59aea52..14ba213cfca 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp @@ -45,20 +45,26 @@ namespace mongo { - const int RecordStoreV1Base::Buckets = 19; - const int RecordStoreV1Base::MaxBucket = 18; - /* Deleted list buckets are used to quickly locate free space based on size. Each bucket - contains records up to that size. All records >= 4mb are placed into the 16mb bucket. + contains records up to that size (meaning a record with a size exactly equal to + bucketSizes[n] would go into bucket n+1). */ const int RecordStoreV1Base::bucketSizes[] = { - 0x20, 0x40, 0x80, 0x100, // 32, 64, 128, 256 - 0x200, 0x400, 0x800, 0x1000, // 512, 1K, 2K, 4K - 0x2000, 0x4000, 0x8000, 0x10000, // 8K, 16K, 32K, 64K - 0x20000, 0x40000, 0x80000, 0x100000, // 128K, 256K, 512K, 1M - 0x200000, 0x400000, 0x1000000, // 2M, 4M, 16M (see above) + 0x20, 0x40, 0x80, 0x100, // 32, 64, 128, 256 + 0x200, 0x400, 0x800, 0x1000, // 512, 1K, 2K, 4K + 0x2000, 0x4000, 0x8000, 0x10000, // 8K, 16K, 32K, 64K + 0x20000, 0x40000, 0x80000, 0x100000, // 128K, 256K, 512K, 1M + 0x200000, 0x400000, 0x600000, 0x800000, // 2M, 4M, 6M, 8M + 0xA00000, 0xC00000, 0xE00000, // 10M, 12M, 14M, + MaxAllowedAllocation, // 16.5M + MaxAllowedAllocation + 1, // Only MaxAllowedAllocation sized records go here. + INT_MAX, // "oversized" bucket for unused parts of extents. }; + // If this fails, it means that bucketSizes doesn't have the correct number of entries. + BOOST_STATIC_ASSERT(sizeof(RecordStoreV1Base::bucketSizes) + / sizeof(RecordStoreV1Base::bucketSizes[0]) + == RecordStoreV1Base::Buckets); RecordStoreV1Base::RecordStoreV1Base( const StringData& ns, RecordStoreV1MetaData* details, @@ -241,6 +247,10 @@ namespace mongo { "record has to be >= 4 bytes" ); } int lenWHdr = docSize + Record::HeaderSize; + if ( lenWHdr > MaxAllowedAllocation ) { + return StatusWith<DiskLoc>( ErrorCodes::InvalidLength, + "record has to be <= 16.5MB" ); + } if (doc->addPadding() && !isCapped()) lenWHdr = quantizeAllocationSpace( lenWHdr ); @@ -271,6 +281,11 @@ namespace mongo { "record has to be >= 4 bytes" ); } + if ( len + Record::HeaderSize > MaxAllowedAllocation ) { + return StatusWith<DiskLoc>( ErrorCodes::InvalidLength, + "record has to be <= 16.5MB" ); + } + return _insertRecord( txn, data, len, enforceQuota ); } @@ -321,6 +336,10 @@ namespace mongo { 10003 ); // we have to move + if ( dataSize + Record::HeaderSize > MaxAllowedAllocation ) { + return StatusWith<DiskLoc>( ErrorCodes::InvalidLength, + "record has to be <= 16.5MB" ); + } StatusWith<DiskLoc> newLocation = _insertRecord( txn, data, dataSize, enforceQuota ); if ( !newLocation.isOK() ) @@ -663,8 +682,7 @@ namespace mongo { len += r->lengthWithHeaders(); nlen += r->netLength(); - if ( r->lengthWithHeaders() == - quantizeAllocationSpace( r->lengthWithHeaders() ) ) { + if ( isQuantized( r->lengthWithHeaders() ) ) { // Count the number of records having a size consistent with // the quantizeAllocationSpace quantization implementation. ++nQuantizedSize; @@ -864,16 +882,21 @@ namespace mongo { } int RecordStoreV1Base::quantizeAllocationSpace(int allocSize) { - for ( int i = 0; i < Buckets; i++ ) { + invariant(allocSize <= MaxAllowedAllocation); + for ( int i = 0; i < Buckets - 2; i++ ) { // last two bucketSizes are invalid if ( bucketSizes[i] >= allocSize ) { // Return the size of the first bucket sized >= the requested size. return bucketSizes[i]; } } + invariant(false); // prior invariant means we should find something. + } + + bool RecordStoreV1Base::isQuantized(int recordSize) { + if (recordSize > MaxAllowedAllocation) + return false; - // TODO make a specific bucket large enough to hold all documents rather than doing this. - invariant(allocSize < bucketSizes[MaxBucket] + 1024*1024); - return bucketSizes[MaxBucket] + 1024*1024; + return recordSize == quantizeAllocationSpace(recordSize); } int RecordStoreV1Base::bucket(int size) { @@ -885,7 +908,10 @@ namespace mongo { return i; } } - return MaxBucket; + // Technically, this is reachable if size == INT_MAX, but it would be an error to pass that + // in anyway since it would be impossible to have a record that large given the file and + // extent headers. + invariant(false); } Status RecordStoreV1Base::setCustomOption( OperationContext* txn, diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_base.h b/src/mongo/db/storage/mmap_v1/record_store_v1_base.h index 8c1141b0ec4..d2b4a451648 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_base.h +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_base.h @@ -66,10 +66,14 @@ namespace mongo { long long dataSize, long long numRecords ) = 0; - virtual const DiskLoc& deletedListEntry( int bucket ) const = 0; + virtual DiskLoc deletedListEntry( int bucket ) const = 0; virtual void setDeletedListEntry( OperationContext* txn, int bucket, const DiskLoc& loc ) = 0; + + virtual DiskLoc deletedListLegacyGrabBag() const = 0; + virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc) = 0; + virtual void orphanDeletedList(OperationContext* txn) = 0; virtual const DiskLoc& firstExtent( OperationContext* txn ) const = 0; @@ -96,8 +100,8 @@ namespace mongo { class RecordStoreV1Base : public RecordStore { public: - static const int Buckets; - static const int MaxBucket; + static const int Buckets = 26; + static const int MaxAllowedAllocation = 16*1024*1024 + 512*1024; static const int bucketSizes[]; @@ -186,6 +190,8 @@ namespace mongo { */ static int quantizeAllocationSpace(int minSize); + static bool isQuantized(int recordSize); + /* return which "deleted bucket" for this size object */ static int bucket(int size); diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp index 8d6a0672a43..c6e7cfe6cef 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp @@ -310,7 +310,7 @@ namespace mongo { } - const DiskLoc &CappedRecordStoreV1::cappedFirstDeletedInCurExtent() const { + DiskLoc CappedRecordStoreV1::cappedFirstDeletedInCurExtent() const { if ( cappedLastDelRecLastExtent().isNull() ) return cappedListOfAllDeletedRecords(); else @@ -549,7 +549,7 @@ namespace mongo { } } - const DiskLoc& CappedRecordStoreV1::cappedListOfAllDeletedRecords() const { + DiskLoc CappedRecordStoreV1::cappedListOfAllDeletedRecords() const { return _details->deletedListEntry(0); } @@ -558,7 +558,7 @@ namespace mongo { return _details->setDeletedListEntry(txn, 0, loc); } - const DiskLoc& CappedRecordStoreV1::cappedLastDelRecLastExtent() const { + DiskLoc CappedRecordStoreV1::cappedLastDelRecLastExtent() const { return _details->deletedListEntry(1); } diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h index 4422b5d451b..83979f3acab 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h @@ -99,13 +99,13 @@ namespace mongo { private: // -- start copy from cap.cpp -- void compact(OperationContext* txn); - const DiskLoc& cappedFirstDeletedInCurExtent() const; + DiskLoc cappedFirstDeletedInCurExtent() const; void setFirstDeletedInCurExtent( OperationContext* txn, const DiskLoc& loc ); void cappedCheckMigrate(OperationContext* txn); DiskLoc __capAlloc( OperationContext* txn, int len ); bool inCapExtent( const DiskLoc &dl ) const; - const DiskLoc& cappedListOfAllDeletedRecords() const; - const DiskLoc& cappedLastDelRecLastExtent() const; + DiskLoc cappedListOfAllDeletedRecords() const; + DiskLoc cappedLastDelRecLastExtent() const; void setListOfAllDeletedRecords( OperationContext* txn, const DiskLoc& loc ); void setLastDelRecLastExtent( OperationContext* txn, const DiskLoc& loc ); Extent *theCapExtent() const; diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp index 78b8b60b159..f89d727727b 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp @@ -116,7 +116,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } rs.insertRecord(&txn, zeros, 100 - Record::HeaderSize, false); @@ -130,7 +130,7 @@ namespace { {DiskLoc(0, 1100), 900}, {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc().setInvalid()); // unlooped } @@ -158,7 +158,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } rs.insertRecord(&txn, zeros, 100 - Record::HeaderSize, false); @@ -176,7 +176,7 @@ namespace { {DiskLoc(0, 1500), 50}, // gap at end of extent {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000)); } @@ -204,7 +204,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000)); - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } rs.insertRecord(&txn, zeros, 100 - Record::HeaderSize, false); @@ -222,7 +222,7 @@ namespace { {DiskLoc(0, 1500), 50}, // gap at end of extent {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000)); } @@ -253,7 +253,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000)); - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } rs.insertRecord(&txn, zeros, 100 - Record::HeaderSize, false); @@ -271,7 +271,7 @@ namespace { {DiskLoc(0, 1500), 123}, // gap at end of extent {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000)); } @@ -299,7 +299,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000)); - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } rs.insertRecord(&txn, zeros, 100 - Record::HeaderSize, false); @@ -318,7 +318,7 @@ namespace { {DiskLoc(0, 1600), 24}, // gap at end of extent {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000)); } @@ -346,7 +346,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } rs.insertRecord(&txn, zeros, 100 - Record::HeaderSize, false); @@ -365,7 +365,7 @@ namespace { {DiskLoc(1, 1100), 900}, {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(1, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc().setInvalid()); // unlooped } @@ -396,7 +396,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000)); - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } rs.insertRecord(&txn, zeros, 200 - Record::HeaderSize, false); @@ -416,7 +416,7 @@ namespace { {DiskLoc(1, 1900), 100}, {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(1, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(1, 1000)); } @@ -449,7 +449,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } rs.insertRecord(&txn, zeros, 500 - Record::HeaderSize, false); @@ -471,7 +471,7 @@ namespace { {DiskLoc(0, 1920), 80}, {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000)); } @@ -499,7 +499,7 @@ namespace { }; md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped - initializeV1RS(&txn, records, drecs, &em, md); + initializeV1RS(&txn, records, drecs, NULL, &em, md); } // This list of sizes was empirically generated to achieve this outcome. Don't think too @@ -553,7 +553,7 @@ namespace { {DiskLoc(0, 1628), 84}, {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0)); ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000)); } @@ -584,7 +584,7 @@ namespace { md->setCapExtent(&txn, DiskLoc(0, 0)); md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped - initializeV1RS(&txn, recs, drecs, &em, md); + initializeV1RS(&txn, recs, drecs, NULL, &em, md); } // Insert bypasses standard alloc/insert routines to use the extent we want. diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp index 4b8a51e9e87..b4f6f86646f 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp @@ -45,6 +45,7 @@ #include "mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h" #include "mongo/util/log.h" #include "mongo/util/progress_meter.h" +#include "mongo/util/mongoutils/str.h" #include "mongo/util/timer.h" #include "mongo/util/touch_pages.h" @@ -80,6 +81,20 @@ namespace mongo { DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents( OperationContext* txn, int lenToAllocRaw ) { + + // Slowly drain the deletedListLegacyGrabBag by popping one record off and putting it in the + // correct deleted list each time we try to allocate a new record. This ensures we won't + // orphan any data when upgrading from old versions, without needing a long upgrade phase. + // This is done before we try to allocate the new record so we can take advantage of the new + // space immediately. + { + const DiskLoc head = _details->deletedListLegacyGrabBag(); + if (!head.isNull()) { + _details->setDeletedListLegacyGrabBag(txn, drec(head)->nextDeleted()); + addDeletedRec(txn, head); + } + } + // align size up to a multiple of 4 const int lenToAlloc = (lenToAllocRaw + (4-1)) & ~(4-1); @@ -132,6 +147,13 @@ namespace mongo { StatusWith<DiskLoc> SimpleRecordStoreV1::allocRecord( OperationContext* txn, int lengthWithHeaders, bool enforceQuota ) { + if (lengthWithHeaders > MaxAllowedAllocation) { + return StatusWith<DiskLoc>( + ErrorCodes::InvalidLength, + str::stream() << "Attempting to allocate a record larger than maximum size: " + << lengthWithHeaders << " > 16.5MB"); + } + DiskLoc loc = _allocFromExistingExtents( txn, lengthWithHeaders ); if ( !loc.isNull() ) return StatusWith<DiskLoc>( loc ); diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp index 6b5dd87bf1d..7451a53599a 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp @@ -46,24 +46,26 @@ namespace { ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10001), 16*1024); ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(100000), 128*1024); ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1000001), 1024*1024); - ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10000000), 16*1024*1024); - ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(16*1024*1024), 16*1024*1024); - ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(16*1024*1024 + 1), 17*1024*1024); + ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10000000), 10*1024*1024); + ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14*1024*1024 - 1), 14*1024*1024); + ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14*1024*1024), 14*1024*1024); + ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14*1024*1024 + 1), + 16*1024*1024 + 512*1024); + ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(16*1024*1024 + 512*1024), + 16*1024*1024 + 512*1024); } TEST( SimpleRecordStoreV1, quantizeAllocationMinMaxBound ) { - const int maxSize = 16 * 1024 * 1024; + const int maxSize = RecordStoreV1Base::MaxAllowedAllocation; ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1), 32); ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(maxSize), maxSize); } /** - * For buckets up to 4MB powerOf2 allocation should round up to next power of 2. It should be - * return the input unmodified if it is already a power of 2. + * Tests quantization of sizes around all valid bucket sizes. */ - TEST( SimpleRecordStoreV1, quantizePowerOf2Small ) { - // only tests buckets <= 4MB. Higher buckets quatize to 1MB even with powerOf2 - for (int bucket = 0; bucket < RecordStoreV1Base::MaxBucket; bucket++) { + TEST( SimpleRecordStoreV1, quantizeAroundBucketSizes ) { + for (int bucket = 0; bucket < RecordStoreV1Base::Buckets - 2; bucket++) { const int size = RecordStoreV1Base::bucketSizes[bucket]; const int nextSize = RecordStoreV1Base::bucketSizes[bucket + 1]; @@ -75,8 +77,8 @@ namespace { ASSERT_EQUALS( size, RecordStoreV1Base::quantizeAllocationSpace( size ) ); - // size + 1 is quantized to nextSize (unless > 4MB which is covered by next test) - if (size < 4*1024*1024) { + // size + 1 is quantized to nextSize (if it is a valid allocation) + if (size + 1 <= RecordStoreV1Base::MaxAllowedAllocation) { ASSERT_EQUALS( nextSize, RecordStoreV1Base::quantizeAllocationSpace( size + 1 ) ); } @@ -188,7 +190,7 @@ namespace { {DiskLoc(0, 1000), 512 + 31}, {} }; - initializeV1RS(&txn, NULL, drecs, &em, md); + initializeV1RS(&txn, NULL, drecs, NULL, &em, md); } BsonDocWriter docWriter(docForRecordSize( 300 ), true); @@ -203,7 +205,7 @@ namespace { LocAndSize drecs[] = { {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); } } @@ -221,7 +223,7 @@ namespace { {DiskLoc(0, 1000), 512 + 32}, {} }; - initializeV1RS(&txn, NULL, drecs, &em, md); + initializeV1RS(&txn, NULL, drecs, NULL, &em, md); } BsonDocWriter docWriter(docForRecordSize( 300 ), true); @@ -237,7 +239,7 @@ namespace { {DiskLoc(0, 1512), 32}, {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); } } @@ -255,7 +257,7 @@ namespace { {DiskLoc(0, 1000), 331}, {} }; - initializeV1RS(&txn, NULL, drecs, &em, md); + initializeV1RS(&txn, NULL, drecs, NULL, &em, md); } BsonDocWriter docWriter(docForRecordSize( 300 ), false); @@ -270,7 +272,7 @@ namespace { LocAndSize drecs[] = { {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); } } @@ -288,7 +290,7 @@ namespace { {DiskLoc(0, 1000), 332}, {} }; - initializeV1RS(&txn, NULL, drecs, &em, md); + initializeV1RS(&txn, NULL, drecs, NULL, &em, md); } BsonDocWriter docWriter(docForRecordSize( 300 ), false); @@ -304,7 +306,135 @@ namespace { {DiskLoc(0, 1300), 32}, {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); + } + } + + /** + * alloc() will use from the legacy grab bag if it can. + */ + TEST(SimpleRecordStoreV1, GrabBagIsUsed) { + OperationContextNoop txn; + DummyExtentManager em; + DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 ); + SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false ); + + { + LocAndSize drecs[] = { + {} + }; + LocAndSize grabBag[] = { + {DiskLoc(0, 1000), 4*1024*1024}, + {DiskLoc(1, 1000), 4*1024*1024}, + {} + }; + initializeV1RS(&txn, NULL, drecs, grabBag, &em, md); + } + + BsonDocWriter docWriter(docForRecordSize( 256 ), false); + StatusWith<DiskLoc> actualLocation = rs.insertRecord(&txn, &docWriter, false); + ASSERT_OK( actualLocation.getStatus() ); + + { + LocAndSize recs[] = { + {DiskLoc(0, 1000), 256}, + {} + }; + LocAndSize drecs[] = { + {DiskLoc(0, 1256), 4*1024*1024 - 256}, + {} + }; + LocAndSize grabBag[] = { + {DiskLoc(1, 1000), 4*1024*1024}, + {} + }; + assertStateV1RS(&txn, recs, drecs, grabBag, &em, md); + } + } + + /** + * alloc() will pull from the legacy grab bag even if it isn't needed. + */ + TEST(SimpleRecordStoreV1, GrabBagIsPoppedEvenIfUnneeded) { + OperationContextNoop txn; + DummyExtentManager em; + DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 ); + SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false ); + + { + LocAndSize drecs[] = { + {DiskLoc(0, 1000), 1000}, + {} + }; + LocAndSize grabBag[] = { + {DiskLoc(1, 1000), 4*1024*1024}, + {DiskLoc(2, 1000), 4*1024*1024}, + {} + }; + initializeV1RS(&txn, NULL, drecs, grabBag, &em, md); + } + + BsonDocWriter docWriter(docForRecordSize( 1000 ), false); + StatusWith<DiskLoc> actualLocation = rs.insertRecord(&txn, &docWriter, false); + ASSERT_OK( actualLocation.getStatus() ); + + { + LocAndSize recs[] = { + {DiskLoc(0, 1000), 1000}, + {} + }; + LocAndSize drecs[] = { + {DiskLoc(1, 1000), 4*1024*1024}, + {} + }; + LocAndSize grabBag[] = { + {DiskLoc(2, 1000), 4*1024*1024}, + {} + }; + assertStateV1RS(&txn, recs, drecs, grabBag, &em, md); + } + } + + /** + * alloc() will pull from the legacy grab bag even if it can't be used + */ + TEST(SimpleRecordStoreV1, GrabBagIsPoppedEvenIfUnusable) { + OperationContextNoop txn; + DummyExtentManager em; + DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 ); + SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false ); + + { + LocAndSize drecs[] = { + {DiskLoc(0, 1000), 8*1024*1024}, + {} + }; + LocAndSize grabBag[] = { + {DiskLoc(1, 1000), 4*1024*1024}, + {DiskLoc(2, 1000), 4*1024*1024}, + {} + }; + initializeV1RS(&txn, NULL, drecs, grabBag, &em, md); + } + + BsonDocWriter docWriter(docForRecordSize( 8*1024*1024 ), false); + StatusWith<DiskLoc> actualLocation = rs.insertRecord(&txn, &docWriter, false); + ASSERT_OK( actualLocation.getStatus() ); + + { + LocAndSize recs[] = { + {DiskLoc(0, 1000), 8*1024*1024}, + {} + }; + LocAndSize drecs[] = { + {DiskLoc(1, 1000), 4*1024*1024}, + {} + }; + LocAndSize grabBag[] = { + {DiskLoc(2, 1000), 4*1024*1024}, + {} + }; + assertStateV1RS(&txn, recs, drecs, grabBag, &em, md); } } @@ -352,7 +482,7 @@ namespace { {} }; - initializeV1RS(&txn, recs, drecs, &em, md); + initializeV1RS(&txn, recs, drecs, NULL, &em, md); ASSERT_EQUALS(em.getExtent(DiskLoc(0, 0))->length, em.minSize()); } @@ -368,7 +498,7 @@ namespace { {DiskLoc(0, Extent::HeaderSize()), em.minSize() - Extent::HeaderSize()}, {} }; - assertStateV1RS(&txn, recs, drecs, &em, md); + assertStateV1RS(&txn, recs, drecs, NULL, &em, md); } } } diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp index 50a34ac040d..879278bc4b8 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp @@ -104,7 +104,7 @@ namespace mongo { DiskLoc myNull; } - const DiskLoc& DummyRecordStoreV1MetaData::deletedListEntry( int bucket ) const { + DiskLoc DummyRecordStoreV1MetaData::deletedListEntry( int bucket ) const { invariant( bucket >= 0 ); if ( static_cast<size_t>( bucket ) >= _deletedLists.size() ) return myNull; @@ -121,6 +121,15 @@ namespace mongo { _deletedLists[bucket] = loc; } + DiskLoc DummyRecordStoreV1MetaData::deletedListLegacyGrabBag() const { + return _deletedListLegacyGrabBag; + } + + void DummyRecordStoreV1MetaData::setDeletedListLegacyGrabBag(OperationContext* txn, + const DiskLoc& loc) { + _deletedListLegacyGrabBag = loc; + } + void DummyRecordStoreV1MetaData::orphanDeletedList(OperationContext* txn) { // They will be recreated on demand. _deletedLists.clear(); @@ -367,6 +376,7 @@ namespace { void initializeV1RS(OperationContext* txn, const LocAndSize* records, const LocAndSize* drecs, + const LocAndSize* legacyGrabBag, DummyExtentManager* em, DummyRecordStoreV1MetaData* md) { invariant(records || drecs); // if both are NULL nothing is being created... @@ -381,6 +391,7 @@ namespace { ExtentSizes extentSizes; accumulateExtentSizeRequirements(records, &extentSizes); accumulateExtentSizeRequirements(drecs, &extentSizes); + accumulateExtentSizeRequirements(legacyGrabBag, &extentSizes); invariant(!extentSizes.empty()); const int maxExtent = extentSizes.rbegin()->first; @@ -493,13 +504,41 @@ namespace { } } + if (legacyGrabBag && !legacyGrabBag[0].loc.isNull()) { + invariant(!md->isCapped()); // capped should have an empty legacy grab bag. + + int grabBagIdx = 0; + DiskLoc* prevNextPtr = NULL; + while (!legacyGrabBag[grabBagIdx].loc.isNull()) { + const DiskLoc loc = legacyGrabBag[grabBagIdx].loc; + const int size = legacyGrabBag[grabBagIdx].size; + invariant(size >= Record::HeaderSize); + + if (grabBagIdx == 0) { + md->setDeletedListLegacyGrabBag(txn, loc); + } + else { + *prevNextPtr = loc; + } + + DeletedRecord* drec = &em->recordForV1(loc)->asDeleted(); + drec->lengthWithHeaders() = size; + drec->extentOfs() = 0; + drec->nextDeleted() = DiskLoc(); + prevNextPtr = &drec->nextDeleted(); + + grabBagIdx++; + } + } + // Make sure we set everything up as requested. - assertStateV1RS(txn, records, drecs, em, md); + assertStateV1RS(txn, records, drecs, legacyGrabBag, em, md); } void assertStateV1RS(OperationContext* txn, const LocAndSize* records, const LocAndSize* drecs, + const LocAndSize* legacyGrabBag, const ExtentManager* em, const DummyRecordStoreV1MetaData* md) { invariant(records || drecs); // if both are NULL nothing is being asserted... @@ -597,6 +636,28 @@ namespace { // both the expected and actual deleted lists must be done at this point ASSERT_EQUALS(drecs[drecIdx].loc, DiskLoc()); } + + if (legacyGrabBag) { + int grabBagIdx = 0; + DiskLoc actualLoc = md->deletedListLegacyGrabBag(); + while (!actualLoc.isNull()) { + const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted(); + const int actualSize = actualDrec->lengthWithHeaders(); + + ASSERT_EQUALS(actualLoc, legacyGrabBag[grabBagIdx].loc); + ASSERT_EQUALS(actualSize, legacyGrabBag[grabBagIdx].size); + + grabBagIdx++; + actualLoc = actualDrec->nextDeleted(); + } + + // both the expected and actual deleted lists must be done at this point + ASSERT_EQUALS(legacyGrabBag[grabBagIdx].loc, DiskLoc()); + } + else { + // Unless a test is actually using the grabBag it should be empty + ASSERT_EQUALS(md->deletedListLegacyGrabBag(), DiskLoc()); + } } catch (...) { // If a test fails, provide extra info to make debugging easier diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h index 352c91efd16..536f8c23e96 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h @@ -59,10 +59,14 @@ namespace mongo { long long dataSize, long long numRecords ); - virtual const DiskLoc& deletedListEntry( int bucket ) const; + virtual DiskLoc deletedListEntry( int bucket ) const; virtual void setDeletedListEntry( OperationContext* txn, int bucket, const DiskLoc& loc ); + + virtual DiskLoc deletedListLegacyGrabBag() const; + virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc); + virtual void orphanDeletedList(OperationContext* txn); virtual const DiskLoc& firstExtent( OperationContext* txn ) const; @@ -104,6 +108,7 @@ namespace mongo { double _paddingFactor; std::vector<DiskLoc> _deletedLists; + DiskLoc _deletedListLegacyGrabBag; }; class DummyExtentManager : public ExtentManager { @@ -177,6 +182,7 @@ namespace mongo { void initializeV1RS(OperationContext* txn, const LocAndSize* records, const LocAndSize* drecs, + const LocAndSize* legacyGrabBag, DummyExtentManager* em, DummyRecordStoreV1MetaData* md); @@ -190,6 +196,7 @@ namespace mongo { void assertStateV1RS(OperationContext* txn, const LocAndSize* records, const LocAndSize* drecs, + const LocAndSize* legacyGrabBag, const ExtentManager* em, const DummyRecordStoreV1MetaData* md); |