diff options
author | Alberto Lerner <alerner@10gen.com> | 2010-09-27 16:53:20 -0400 |
---|---|---|
committer | Alberto Lerner <alerner@10gen.com> | 2010-09-27 16:53:20 -0400 |
commit | f1afbf0f4e52d0b4fc487d41b1c8bc8743d89092 (patch) | |
tree | fa56f688e4306c407dff47b7c4f038d5f1a9e93a | |
parent | 4b56d14a15baf9853df4c74a5fcfa0c32845842b (diff) | |
parent | 6309d60b49739fdd22c70b8d255399b3281812be (diff) | |
download | mongo-f1afbf0f4e52d0b4fc487d41b1c8bc8743d89092.tar.gz |
Merge branch 'master' of github.com:mongodb/mongo
-rw-r--r-- | db/btree.cpp | 51 | ||||
-rw-r--r-- | db/btree.h | 5 | ||||
-rw-r--r-- | db/cap.cpp | 46 | ||||
-rw-r--r-- | db/clientcursor.h | 19 | ||||
-rw-r--r-- | db/dbcommands.cpp | 26 | ||||
-rw-r--r-- | db/dbcommands_admin.cpp | 4 | ||||
-rw-r--r-- | db/diskloc.h | 2 | ||||
-rw-r--r-- | db/dur.cpp | 32 | ||||
-rw-r--r-- | db/dur.h | 68 | ||||
-rw-r--r-- | db/namespace.cpp | 53 | ||||
-rw-r--r-- | db/namespace.h | 21 | ||||
-rw-r--r-- | db/pdfile.cpp | 102 | ||||
-rw-r--r-- | db/pdfile.h | 20 | ||||
-rw-r--r-- | db/query.cpp | 2 | ||||
-rw-r--r-- | db/rec.h | 9 | ||||
-rw-r--r-- | db/repl/manager.cpp | 7 | ||||
-rw-r--r-- | db/repl/rs.h | 2 | ||||
-rw-r--r-- | db/repl/rs_initialsync.cpp | 2 | ||||
-rw-r--r-- | db/repl/rs_rollback.cpp | 2 | ||||
-rwxr-xr-x | dbtests/mmaptests.cpp | 3 | ||||
-rw-r--r-- | dbtests/namespacetests.cpp | 10 | ||||
-rw-r--r-- | dbtests/test.vcxproj | 3 | ||||
-rwxr-xr-x | dbtests/test.vcxproj.filters | 3 | ||||
-rw-r--r-- | jstests/evald.js | 2 | ||||
-rw-r--r-- | jstests/geo_update2.js | 40 | ||||
-rw-r--r-- | s/d_split.cpp | 4 | ||||
-rw-r--r-- | util/hashtab.h | 28 | ||||
-rw-r--r-- | util/message.h | 8 | ||||
-rw-r--r-- | util/mmap.h | 16 | ||||
-rw-r--r-- | util/mmap_win.cpp | 123 |
30 files changed, 478 insertions, 235 deletions
diff --git a/db/btree.cpp b/db/btree.cpp index 1528951a047..43b155aa16d 100644 --- a/db/btree.cpp +++ b/db/btree.cpp @@ -31,6 +31,12 @@ namespace mongo { #define VERIFYTHISLOC dassert( thisLoc.btree() == this ); + BtreeBucket* DiskLoc::btreemod() const { + assert( _a != -1 ); + BtreeBucket *b = (BtreeBucket*) btreeStore->get(*this, BucketSize); + return dur::writing(b); + } + KeyNode::KeyNode(const BucketBasics& bb, const _KeyNode &k) : prevChildBucket(k.prevChildBucket), recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs()) @@ -50,8 +56,8 @@ namespace mongo { /* BucketBasics --------------------------------------------------- */ inline void BucketBasics::modified(const DiskLoc& thisLoc) { - VERIFYTHISLOC - btreeStore->modified(thisLoc); +// VERIFYTHISLOC +// btreeStore->modified(thisLoc); } int BucketBasics::Size() const { @@ -222,6 +228,7 @@ namespace mongo { the keynodes grow from the front. */ inline int BucketBasics::_alloc(int bytes) { + dur::assertWriting(this); topSize += bytes; emptySize -= bytes; int ofs = totalDataSize() - topSize; @@ -284,7 +291,6 @@ namespace mongo { /* insert a key in a bucket with no complexity -- no splits required */ bool BucketBasics::basicInsert(const DiskLoc& thisLoc, int &keypos, const DiskLoc& recordLoc, const BSONObj& key, const Ordering &order) { - modified(thisLoc); assert( keypos >= 0 && keypos <= n ); int bytesNeeded = key.objsize() + sizeof(_KeyNode); if ( bytesNeeded > emptySize ) { @@ -294,13 +300,16 @@ namespace mongo { } for ( int j = n; j > keypos; j-- ) // make room k(j) = k(j-1); - n++; - emptySize -= sizeof(_KeyNode); - _KeyNode& kn = k(keypos); + + BucketBasics *b = this;//dur::writing(this); + + b->n++; + b->emptySize -= sizeof(_KeyNode); + _KeyNode& kn = b->k(keypos); kn.prevChildBucket.Null(); kn.recordLoc = recordLoc; - kn.setKeyDataOfs((short) _alloc(key.objsize()) ); - char *p = dataAt(kn.keyDataOfs()); + kn.setKeyDataOfs((short) b->_alloc(key.objsize()) ); + char *p = b->dataAt(kn.keyDataOfs()); memcpy(p, key.objdata(), key.objsize()); return true; } @@ -633,7 +642,7 @@ found: bool found; DiskLoc loc = locate(id, thisLoc, key, Ordering::make(id.keyPattern()), pos, found, recordLoc, 1); if ( found ) { - loc.btree()->delKeyAtPos(loc, id, pos); + loc.btreemod()->delKeyAtPos(loc, id, pos); return true; } return false; @@ -661,16 +670,10 @@ found: fix(thisLoc, k(i).prevChildBucket); } - /* insert a key in this bucket, splitting if necessary. - keypos - where to insert the key i3n range 0..n. 0=make leftmost, n=make rightmost. - NOTE this function may free some data, and as a result the value passed for keypos may - be invalid after calling insertHere() - */ - void BtreeBucket::insertHere(DiskLoc thisLoc, int keypos, + void BtreeBucket::_insertHere(DiskLoc thisLoc, int keypos, DiskLoc recordLoc, const BSONObj& key, const Ordering& order, DiskLoc lchild, DiskLoc rchild, IndexDetails& idx) { - modified(thisLoc); if ( insert_debug ) out() << " " << thisLoc.toString() << ".insertHere " << key.toString() << '/' << recordLoc.toString() << ' ' << lchild.toString() << ' ' << rchild.toString() << " keypos:" << keypos << endl; @@ -806,6 +809,20 @@ found: out() << " split end " << hex << thisLoc.getOfs() << dec << endl; } + /* insert a key in this bucket, splitting if necessary. + keypos - where to insert the key i3n range 0..n. 0=make leftmost, n=make rightmost. + NOTE this function may free some data, and as a result the value passed for keypos may + be invalid after calling insertHere() + */ + void BtreeBucket::insertHere(DiskLoc thisLoc, int keypos, + DiskLoc recordLoc, const BSONObj& key, const Ordering& order, + DiskLoc lchild, DiskLoc rchild, IndexDetails& idx) + { + modified(thisLoc); + BtreeBucket *b = dur::writing(this); + b->_insertHere(thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx); + } + /* start a new index off, empty */ DiskLoc BtreeBucket::addBucket(IndexDetails& id) { DiskLoc loc = btreeStore->insert(id.indexNamespace().c_str(), 0, BucketSize, true); @@ -1247,7 +1264,7 @@ namespace mongo { while( 1 ) { if( loc.btree()->tempNext().isNull() ) { // only 1 bucket at this level. we are done. - idx.head = loc; + dur::writingDiskLoc(idx.head) = loc; break; } levels++; diff --git a/db/btree.h b/db/btree.h index bb31081b00d..63b1b1aa148 100644 --- a/db/btree.h +++ b/db/btree.h @@ -253,9 +253,14 @@ namespace mongo { return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key; } static BtreeBucket* allocTemp(); /* caller must release with free() */ + + void _insertHere(DiskLoc thisLoc, int keypos, + DiskLoc recordLoc, const BSONObj& key, const Ordering &order, + DiskLoc lchild, DiskLoc rchild, IndexDetails&); void insertHere(DiskLoc thisLoc, int keypos, DiskLoc recordLoc, const BSONObj& key, const Ordering &order, DiskLoc lchild, DiskLoc rchild, IndexDetails&); + int _insert(DiskLoc thisLoc, DiskLoc recordLoc, const BSONObj& key, const Ordering &order, bool dupsAllowed, DiskLoc lChild, DiskLoc rChild, IndexDetails&); diff --git a/db/cap.cpp b/db/cap.cpp index e80f27eb873..c36e57c1b58 100644 --- a/db/cap.cpp +++ b/db/cap.cpp @@ -63,7 +63,8 @@ namespace mongo { DiskLoc i = cappedFirstDeletedInCurExtent(); for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted ) drecs.push_back( i ); - cappedFirstDeletedInCurExtent() = i; + + dur::writingDiskLoc( cappedFirstDeletedInCurExtent() ) = i; // This is the O(n^2) part. drecs.sort(); @@ -81,7 +82,7 @@ namespace mongo { DiskLoc b = *j; while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) { // a & b are adjacent. merge. - a.drec()->lengthWithHeaders += b.drec()->lengthWithHeaders; + dur::writingInt( a.drec()->lengthWithHeaders ) += b.drec()->lengthWithHeaders; j++; if ( j == drecs.end() ) { DEBUGGING out() << "temp: compact adddelrec2\n"; @@ -107,8 +108,8 @@ namespace mongo { // migrate old NamespaceDetails format assert( capped ); if ( capExtent.a() == 0 && capExtent.getOfs() == 0 ) { - capFirstNewRecord = DiskLoc(); - capFirstNewRecord.setInvalid(); + //capFirstNewRecord = DiskLoc(); + capFirstNewRecord.writing().setInvalid(); // put all the DeletedRecords in cappedListOfAllDeletedRecords() for ( int i = 1; i < Buckets; ++i ) { DiskLoc first = deletedList[ i ]; @@ -116,14 +117,14 @@ namespace mongo { continue; DiskLoc last = first; for (; !last.drec()->nextDeleted.isNull(); last = last.drec()->nextDeleted ); - last.drec()->nextDeleted = cappedListOfAllDeletedRecords(); - cappedListOfAllDeletedRecords() = first; - deletedList[ i ] = DiskLoc(); + last.drec()->nextDeleted.writing() = cappedListOfAllDeletedRecords(); + cappedListOfAllDeletedRecords().writing() = first; + deletedList[i].writing() = DiskLoc(); } // NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above // Last, in case we're killed before getting here - capExtent = firstExtent; + capExtent.writing() = firstExtent; } } @@ -145,20 +146,20 @@ namespace mongo { // We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent // (or DiskLoc() if new capExtent == firstExtent) if ( capExtent == lastExtent ) - cappedLastDelRecLastExtent() = DiskLoc(); + dur::writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc(); else { DiskLoc i = cappedFirstDeletedInCurExtent(); for (; !i.isNull() && nextIsInCapExtent( i ); i = i.drec()->nextDeleted ); - cappedLastDelRecLastExtent() = i; + dur::writingDiskLoc( cappedLastDelRecLastExtent() ) = i; } - capExtent = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext; + dur::writingDiskLoc( capExtent ) = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext; /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */ //dassert( theCapExtent()->ns == ns ); theCapExtent()->assertOk(); - capFirstNewRecord = DiskLoc(); + dur::writingDiskLoc( capFirstNewRecord ) = DiskLoc(); } DiskLoc NamespaceDetails::__capAlloc( int len ) { @@ -177,10 +178,10 @@ namespace mongo { /* unlink ourself from the deleted list */ if ( !ret.isNull() ) { if ( prev.isNull() ) - cappedListOfAllDeletedRecords() = ret.drec()->nextDeleted; + cappedListOfAllDeletedRecords().writing() = ret.drec()->nextDeleted; else - prev.drec()->nextDeleted = ret.drec()->nextDeleted; - ret.drec()->nextDeleted.setInvalid(); // defensive. + prev.drec()->nextDeleted.writing() = ret.drec()->nextDeleted; + ret.drec()->nextDeleted.writing().setInvalid(); // defensive. assert( ret.drec()->extentOfs < ret.getOfs() ); } @@ -190,7 +191,7 @@ namespace mongo { DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) { // signal done allocating new extents. if ( !cappedLastDelRecLastExtent().isValid() ) - cappedLastDelRecLastExtent() = DiskLoc(); + dur::writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc(); assert( len < 400000000 ); int passes = 0; @@ -209,7 +210,7 @@ namespace mongo { theCapExtent()->assertOk(); DiskLoc firstEmptyExtent; while ( 1 ) { - if ( nrecords < max ) { + if ( stats.nrecords < max ) { loc = __capAlloc( len ); if ( !loc.isNull() ) break; @@ -218,8 +219,9 @@ namespace mongo { // If on first iteration through extents, don't delete anything. if ( !capFirstNewRecord.isValid() ) { advanceCapExtent( ns ); + if ( capExtent != firstExtent ) - capFirstNewRecord.setInvalid(); + capFirstNewRecord.writing().setInvalid(); // else signal done with first iteration through extents. continue; } @@ -248,14 +250,14 @@ namespace mongo { compact(); if( ++passes > maxPasses ) { log() << "passes ns:" << ns << " len:" << len << " maxPasses: " << maxPasses << '\n'; - log() << "passes max:" << max << " nrecords:" << nrecords << " datasize: " << datasize << endl; + log() << "passes max:" << max << " nrecords:" << stats.nrecords << " datasize: " << stats.datasize << endl; massert( 10345 , "passes >= maxPasses in capped collection alloc", false ); } } // Remember first record allocated on this iteration through capExtent. if ( capFirstNewRecord.isValid() && capFirstNewRecord.isNull() ) - capFirstNewRecord = loc; + dur::writingDiskLoc(capFirstNewRecord) = loc; return loc; } @@ -301,7 +303,7 @@ namespace mongo { } } - uassert( 13415, "emptying the collection is not allowed", nrecords > 1 ); + uassert( 13415, "emptying the collection is not allowed", stats.nrecords > 1 ); if ( !capLooped() ) { theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true); @@ -363,7 +365,7 @@ namespace mongo { // preserve firstExtent/lastExtent capExtent = firstExtent; - datasize = nrecords = 0; + stats.datasize = stats.nrecords = 0; // lastExtentSize preserve // nIndexes preserve 0 // capped preserve true diff --git a/db/clientcursor.h b/db/clientcursor.h index 729b5a5b412..08e3311d1a7 100644 --- a/db/clientcursor.h +++ b/db/clientcursor.h @@ -292,33 +292,22 @@ namespace mongo { void storeOpForSlave( DiskLoc last ); void updateSlaveLocation( CurOp& curop ); - unsigned idleTime(){ - return _idleAgeMillis; - } + unsigned idleTime() const { return _idleAgeMillis; } static void idleTimeReport(unsigned millis); private: // cursors normally timeout after an inactivy period to prevent excess memory use // setting this prevents timeout of the cursor in question. - void noTimeout() { - _pinValue++; - } + void noTimeout() { _pinValue++; } - multimap<DiskLoc, ClientCursor*>& byLoc() { - return _db->ccByLoc; - } + multimap<DiskLoc, ClientCursor*>& byLoc() { return _db->ccByLoc; } public: - void setDoingDeletes( bool doingDeletes ){ - _doingDeletes = doingDeletes; - } + void setDoingDeletes( bool doingDeletes ) {_doingDeletes = doingDeletes; } static void appendStats( BSONObjBuilder& result ); - static unsigned numCursors() { return clientCursorsById.size(); } - static void informAboutToDeleteBucket(const DiskLoc& b); static void aboutToDelete(const DiskLoc& dl); - static void find( const string& ns , set<CursorId>& all ); }; diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp index b9486032c09..28f0ebb6705 100644 --- a/db/dbcommands.cpp +++ b/db/dbcommands.cpp @@ -954,7 +954,7 @@ namespace mongo { Client::Context ctx( ns ); NamespaceDetails *d = nsdetails(ns.c_str()); - if ( ! d || d->nrecords == 0 ){ + if ( ! d || d->stats.nrecords == 0 ){ result.appendNumber( "size" , 0 ); result.appendNumber( "numObjects" , 0 ); result.append( "millis" , timer.millis() ); @@ -966,8 +966,8 @@ namespace mongo { shared_ptr<Cursor> c; if ( min.isEmpty() && max.isEmpty() ) { if ( estimate ){ - result.appendNumber( "size" , d->datasize ); - result.appendNumber( "numObjects" , d->nrecords ); + result.appendNumber( "size" , d->stats.datasize ); + result.appendNumber( "numObjects" , d->stats.nrecords ); result.append( "millis" , timer.millis() ); return 1; } @@ -985,7 +985,7 @@ namespace mongo { c.reset( new BtreeCursor( d, d->idxNo(*idx), *idx, min, max, false, 1 ) ); } - long long avgObjSize = d->datasize / d->nrecords; + long long avgObjSize = d->stats.datasize / d->stats.nrecords; long long maxSize = jsobj["maxSize"].numberLong(); long long maxObjects = jsobj["maxObjects"].numberLong(); @@ -1043,9 +1043,9 @@ namespace mongo { log() << "error: have index [" << collNS << "] but no NamespaceDetails" << endl; continue; } - totalSize += mine->datasize; + totalSize += mine->stats.datasize; if ( details ) - details->appendNumber( d.indexName() , mine->datasize / scale ); + details->appendNumber( d.indexName() , mine->stats.datasize / scale ); } return totalSize; } @@ -1085,10 +1085,10 @@ namespace mongo { return false; } - long long size = nsd->datasize / scale; - result.appendNumber( "count" , nsd->nrecords ); + long long size = nsd->stats.datasize / scale; + result.appendNumber( "count" , nsd->stats.nrecords ); result.appendNumber( "size" , size ); - result.append ( "avgObjSize" , double(size) / double(nsd->nrecords) ); + result.append ( "avgObjSize" , double(size) / double(nsd->stats.nrecords) ); int numExtents; result.appendNumber( "storageSize" , nsd->storageSize( &numExtents ) / scale ); result.append( "numExtents" , numExtents ); @@ -1143,8 +1143,8 @@ namespace mongo { } ncollections += 1; - objects += nsd->nrecords; - size += nsd->datasize; + objects += nsd->stats.nrecords; + size += nsd->stats.datasize; int temp; storageSize += nsd->storageSize( &temp ); @@ -1191,7 +1191,7 @@ namespace mongo { string toNs = dbname + "." + to; NamespaceDetails *nsd = nsdetails( fromNs.c_str() ); massert( 10301 , "source collection " + fromNs + " does not exist", nsd ); - long long excessSize = nsd->datasize - size * 2; // datasize and extentSize can't be compared exactly, so add some padding to 'size' + long long excessSize = nsd->stats.datasize - size * 2; // datasize and extentSize can't be compared exactly, so add some padding to 'size' DiskLoc extent = nsd->firstExtent; for( ; excessSize > extent.ext()->length && extent != nsd->lastExtent; extent = extent.ext()->xnext ) { excessSize -= extent.ext()->length; @@ -1485,6 +1485,8 @@ namespace mongo { if (!cc->yieldSometimes()) break; + + RARELY killCurrentOp.checkForInterrupt(); } BSONArrayBuilder b( result.subarrayStart( "values" ) ); diff --git a/db/dbcommands_admin.cpp b/db/dbcommands_admin.cpp index b7ad9602c5f..a37bb4f6ce1 100644 --- a/db/dbcommands_admin.cpp +++ b/db/dbcommands_admin.cpp @@ -128,7 +128,7 @@ namespace mongo { ss << " extent asserted "; } - ss << " datasize?:" << d->datasize << " nrecords?:" << d->nrecords << " lastExtentSize:" << d->lastExtentSize << '\n'; + ss << " datasize?:" << d->stats.datasize << " nrecords?:" << d->stats.nrecords << " lastExtentSize:" << d->lastExtentSize << '\n'; ss << " padding:" << d->paddingFactor << '\n'; try { @@ -175,7 +175,7 @@ namespace mongo { else ss << " (OK)"; ss << '\n'; } - ss << " " << n << " objects found, nobj:" << d->nrecords << '\n'; + ss << " " << n << " objects found, nobj:" << d->stats.nrecords << '\n'; ss << " " << len << " bytes data w/headers\n"; ss << " " << nlen << " bytes data wout/headers\n"; } diff --git a/db/diskloc.h b/db/diskloc.h index 1f06fc58778..0f675c25d90 100644 --- a/db/diskloc.h +++ b/db/diskloc.h @@ -127,6 +127,8 @@ namespace mongo { return compare(b) < 0; } + DiskLoc& writing(); // see dur.h + /* Get the "thing" associated with this disk location. it is assumed the object is what you say it is -- you must assure that (think of this as an unchecked type cast) diff --git a/db/dur.cpp b/db/dur.cpp new file mode 100644 index 00000000000..3d63b5003f4 --- /dev/null +++ b/db/dur.cpp @@ -0,0 +1,32 @@ +// @file dur.cpp
+
+#include "pch.h"
+#include "dur.h"
+#include "../util/mmap.h"
+
+namespace mongo {
+
+ namespace dur {
+
+#if defined(_DEBUG) && defined(_DURABLE)
+
+ void* writingPtr(void *x, size_t len) {
+ cout << "TEMP writing " << x << ' ' << len << endl;
+ return MemoryMappedFile::getWriteViewFor(x);
+ }
+
+ void assertReading(void *p) {
+ assert( MemoryMappedFile::getWriteViewFor(p) !=
+ p );
+ }
+ void assertWriting(void *p) {
+ // todo:
+ //assert( MemoryMappedFile::getWriteViewFor(p) ==
+ // p );
+ }
+
+#endif
+
+ }
+
+}
diff --git a/db/dur.h b/db/dur.h new file mode 100644 index 00000000000..c139bedb59a --- /dev/null +++ b/db/dur.h @@ -0,0 +1,68 @@ +// @file dur.h durability support
+
+#pragma once
+
+#include "diskloc.h"
+
+namespace mongo {
+
+ namespace dur {
+
+ /** call writing...() to declare "i'm about to write to x and it should be logged for redo."
+
+ failure to call writing...() is checked in _DEBUG mode by using a read only mapped view
+ (i.e., you'll segfault if you don't...)
+ */
+
+
+#if !defined(_DURABLE)
+
+ inline void* writingPtr(void *x, size_t len) { return x; }
+ inline DiskLoc& writingDiskLoc(DiskLoc& d) { return d; }
+ inline int& writingInt(int& d) { return d; }
+ template <typename T> inline T* writing(T *x) { return x; }
+ inline void assertReading(void *p) { }
+ inline void assertWriting(void *p) { }
+
+#else
+
+ void* writingPtr(void *x, size_t len);
+
+ inline DiskLoc& writingDiskLoc(DiskLoc& d) {
+#if defined(_DEBUG)
+ return *((DiskLoc*) writingPtr(&d, sizeof(d)));
+#else
+ return d;
+#endif
+ }
+
+ inline int& writingInt(int& d) {
+#if defined(_DEBUG)
+ return *((int*) writingPtr(&d, sizeof(d)));
+#else
+ return d;
+#endif
+ }
+
+ template <typename T>
+ inline
+ T* writing(T *x) {
+#if defined(_DEBUG)
+ return (T*) writingPtr(x, sizeof(T));
+#else
+ return x;
+#endif
+ }
+
+ void assertReading(void *p);
+ void assertWriting(void *p);
+
+#endif
+
+ }
+
+ inline DiskLoc& DiskLoc::writing() {
+ return dur::writingDiskLoc(*this);
+ }
+
+}
diff --git a/db/namespace.cpp b/db/namespace.cpp index 682300a7dd8..0e183680fea 100644 --- a/db/namespace.cpp +++ b/db/namespace.cpp @@ -47,7 +47,7 @@ namespace mongo { NamespaceDetails::NamespaceDetails( const DiskLoc &loc, bool _capped ) { /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */ firstExtent = lastExtent = capExtent = loc; - datasize = nrecords = 0; + stats.datasize = stats.nrecords = 0; lastExtentSize = 0; nIndexes = 0; capped = _capped; @@ -181,41 +181,43 @@ namespace mongo { } void NamespaceDetails::addDeletedRec(DeletedRecord *d, DiskLoc dloc) { + dur::assertReading(this); BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) ); + dassert( dloc.drec() == d ); + //DeletedRecord *dold = d; + d = dur::writing(d); { // defensive code: try to make us notice if we reference a deleted record (unsigned&) (((Record *) d)->data) = 0xeeeeeeee; } - dassert( dloc.drec() == d ); - DEBUGGING out() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl; + DEBUGGING log() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl; if ( capped ) { if ( !cappedLastDelRecLastExtent().isValid() ) { // Initial extent allocation. Insert at end. d->nextDeleted = DiskLoc(); if ( cappedListOfAllDeletedRecords().isNull() ) - cappedListOfAllDeletedRecords() = dloc; + dur::writingDiskLoc( cappedListOfAllDeletedRecords() ) = dloc; else { DiskLoc i = cappedListOfAllDeletedRecords(); - for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted ); - i.drec()->nextDeleted = dloc; + for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted ) + ; + i.drec()->nextDeleted.writing() = dloc; } } else { d->nextDeleted = cappedFirstDeletedInCurExtent(); - cappedFirstDeletedInCurExtent() = dloc; + dur::writingDiskLoc( cappedFirstDeletedInCurExtent() ) = dloc; // always compact() after this so order doesn't matter } } else { int b = bucket(d->lengthWithHeaders); DiskLoc& list = deletedList[b]; DiskLoc oldHead = list; - list = dloc; + dur::writingDiskLoc(list) = dloc; d->nextDeleted = oldHead; } } - /* - lenToAlloc is WITH header - */ + // lenToAlloc is WITH header DiskLoc NamespaceDetails::alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc) { lenToAlloc = (lenToAlloc + 3) & 0xfffffffc; DiskLoc loc = _alloc(ns, lenToAlloc); @@ -223,6 +225,7 @@ namespace mongo { return loc; DeletedRecord *r = loc.drec(); + r = dur::writing(r); /* note we want to grab from the front so our next pointers on disk tend to go in a forward direction which is important for performance. */ @@ -247,9 +250,10 @@ namespace mongo { DiskLoc newDelLoc = loc; newDelLoc.inc(lenToAlloc); DeletedRecord *newDel = DataFileMgr::makeDeletedRecord(newDelLoc, left); - newDel->extentOfs = r->extentOfs; - newDel->lengthWithHeaders = left; - newDel->nextDeleted.Null(); + DeletedRecord *newDelW = dur::writing(newDel); + newDelW->extentOfs = r->extentOfs; + newDelW->lengthWithHeaders = left; + newDelW->nextDeleted.Null(); addDeletedRec(newDel, newDelLoc); @@ -323,8 +327,8 @@ namespace mongo { /* unlink ourself from the deleted list */ { - DeletedRecord *bmr = bestmatch.drec(); - *bestprev = bmr->nextDeleted; + DeletedRecord *bmr = dur::writing(bestmatch.drec()); + *dur::writing(bestprev) = bmr->nextDeleted; bmr->nextDeleted.setInvalid(); // defensive. assert(bmr->extentOfs < bestmatch.getOfs()); } @@ -394,6 +398,21 @@ namespace mongo { return cappedAlloc(ns,len); } + void NamespaceIndex::kill_ns(const char *ns) { + if ( !ht ) + return; + Namespace n(ns); + ht->kill(n); + + for( int i = 0; i<=1; i++ ) { + try { + Namespace extra(n.extraName(i).c_str()); + ht->kill(extra); + } + catch(DBException&) { } + } + } + /* extra space for indexes when more than 10 */ NamespaceDetails::Extra* NamespaceIndex::newExtra(const char *ns, int i, NamespaceDetails *d) { assert( i >= 0 && i <= 1 ); @@ -440,7 +459,7 @@ namespace mongo { id = &idx(nIndexes,false); } - nIndexes++; + (*dur::writing(&nIndexes))++; if ( resetTransient ) NamespaceDetailsTransient::get_w(thisns).addedIndex(); return *id; diff --git a/db/namespace.h b/db/namespace.h index 4f6cde9ac8e..7479a21da24 100644 --- a/db/namespace.h +++ b/db/namespace.h @@ -125,8 +125,10 @@ namespace mongo { */ DiskLoc deletedList[Buckets]; // ofs 168 (8 byte aligned) - long long datasize; - long long nrecords; + struct Stats { + long long datasize; //datasize and nrecords MUST Be adjacent code assumes! + long long nrecords; + } stats; int lastExtentSize; int nIndexes; private: @@ -503,20 +505,7 @@ namespace mongo { return d; } - void kill_ns(const char *ns) { - if ( !ht ) - return; - Namespace n(ns); - ht->kill(n); - - for( int i = 0; i<=1; i++ ) { - try { - Namespace extra(n.extraName(i).c_str()); - ht->kill(extra); - } - catch(DBException&) { } - } - } + void kill_ns(const char *ns); bool find(const char *ns, DiskLoc& loc) { NamespaceDetails *l = details(ns); diff --git a/db/pdfile.cpp b/db/pdfile.cpp index cedcc2aafc6..8e75ce41925 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -396,24 +396,25 @@ namespace mongo { } void addNewExtentToNamespace(const char *ns, Extent *e, DiskLoc eloc, DiskLoc emptyLoc, bool capped) { - DiskLoc oldExtentLoc; NamespaceIndex *ni = nsindex(ns); NamespaceDetails *details = ni->details(ns); if ( details ) { assert( !details->lastExtent.isNull() ); assert( !details->firstExtent.isNull() ); - e->xprev = details->lastExtent; - details->lastExtent.ext()->xnext = eloc; + dur::writingDiskLoc(e->xprev) = details->lastExtent; + dur::writingDiskLoc(details->lastExtent.ext()->xnext) = eloc; assert( !eloc.isNull() ); - details->lastExtent = eloc; + dur::writingDiskLoc(details->lastExtent) = eloc; } else { ni->add_ns(ns, eloc, capped); details = ni->details(ns); } - details->lastExtentSize = e->length; - DEBUGGING out() << "temp: newextent adddelrec " << ns << endl; + { + NamespaceDetails *dw = dur::writing(details); + dw->lastExtentSize = e->length; + } details->addDeletedRec(emptyLoc.drec(), emptyLoc); } @@ -434,11 +435,13 @@ namespace mongo { return cc().database()->addAFile( 0, true )->createExtent(ns, approxSize, newCapped, loops+1); } int offset = header->unused.getOfs(); - header->unused.set( fileNo, offset + ExtentSize ); - header->unusedLength -= ExtentSize; + + DataFileHeader *h = dur::writing(header); + h->unused.set( fileNo, offset + ExtentSize ); + h->unusedLength -= ExtentSize; loc.set(fileNo, offset); Extent *e = _getExtent(loc); - DiskLoc emptyLoc = e->init(ns, ExtentSize, fileNo, offset); + DiskLoc emptyLoc = dur::writing(e)->init(ns, ExtentSize, fileNo, offset); addNewExtentToNamespace(ns, e, loc, emptyLoc, newCapped); @@ -553,9 +556,7 @@ namespace mongo { emptyLoc.inc( (int) (_extentData-(char*)this) ); int l = _length - (_extentData - (char *) this); - //DeletedRecord *empty1 = (DeletedRecord *) extentData; - DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, l); - //assert( empty == empty1 ); + DeletedRecord *empty = dur::writing( DataFileMgr::makeDeletedRecord(emptyLoc, l) ); empty->lengthWithHeaders = l; empty->extentOfs = myLoc.getOfs(); return emptyLoc; @@ -750,12 +751,11 @@ namespace mongo { else { DiskLoc a = freeExtents->firstExtent; assert( a.ext()->xprev.isNull() ); - a.ext()->xprev = d->lastExtent; - d->lastExtent.ext()->xnext = a; - freeExtents->firstExtent = d->firstExtent; - - d->firstExtent.setInvalid(); - d->lastExtent.setInvalid(); + dur::writingDiskLoc( a.ext()->xprev ) = d->lastExtent; + dur::writingDiskLoc( d->lastExtent.ext()->xnext ) = a; + dur::writingDiskLoc( freeExtents->firstExtent ) = d->firstExtent; + dur::writingDiskLoc( d->firstExtent ).setInvalid(); + dur::writingDiskLoc( d->lastExtent ).setInvalid(); } } @@ -843,14 +843,14 @@ namespace mongo { /* remove ourself from the record next/prev chain */ { if ( todelete->prevOfs != DiskLoc::NullOfs ) - todelete->getPrev(dl).rec()->nextOfs = todelete->nextOfs; + dur::writingInt( todelete->getPrev(dl).rec()->nextOfs ) = todelete->nextOfs; if ( todelete->nextOfs != DiskLoc::NullOfs ) - todelete->getNext(dl).rec()->prevOfs = todelete->prevOfs; + dur::writingInt( todelete->getNext(dl).rec()->prevOfs ) = todelete->prevOfs; } /* remove ourself from extent pointers */ { - Extent *e = todelete->myExtent(dl); + Extent *e = dur::writing( todelete->myExtent(dl) ); if ( e->firstRecord == dl ) { if ( todelete->nextOfs == DiskLoc::NullOfs ) e->firstRecord.Null(); @@ -867,18 +867,26 @@ namespace mongo { /* add to the free list */ { - d->nrecords--; - d->datasize -= todelete->netLength(); - /* temp: if in system.indexes, don't reuse, and zero out: we want to be - careful until validated more, as IndexDetails has pointers - to this disk location. so an incorrectly done remove would cause - a lot of problems. - */ + { + NamespaceDetails::Stats *s = dur::writing(&d->stats); + s->datasize -= todelete->netLength(); + s->nrecords--; + } + if ( strstr(ns, ".system.indexes") ) { - memset(todelete, 0, todelete->lengthWithHeaders); + /* temp: if in system.indexes, don't reuse, and zero out: we want to be + careful until validated more, as IndexDetails has pointers + to this disk location. so an incorrectly done remove would cause + a lot of problems. + */ + memset(dur::writingPtr(todelete, todelete->lengthWithHeaders), 0, todelete->lengthWithHeaders); } else { - DEV memset(todelete->data, 0, todelete->netLength()); // attempt to notice invalid reuse. + DEV { + unsigned long long *p = (unsigned long long *) todelete->data; + *dur::writing(p) = 0; + //DEV memset(todelete->data, 0, todelete->netLength()); // attempt to notice invalid reuse. + } d->addDeletedRec((DeletedRecord*)todelete, dl); } } @@ -1082,7 +1090,7 @@ namespace mongo { bool dropDups = idx.dropDups() || inDBRepair; BSONObj order = idx.keyPattern(); - idx.head.Null(); + dur::writingDiskLoc(idx.head).Null(); if ( logLevel > 1 ) printMemInfo( "before index start" ); @@ -1090,9 +1098,9 @@ namespace mongo { unsigned long long n = 0; shared_ptr<Cursor> c = theDataFileMgr.findAll(ns); BSONObjExternalSorter sorter(order); - sorter.hintNumObjects( d->nrecords ); + sorter.hintNumObjects( d->stats.nrecords ); unsigned long long nkeys = 0; - ProgressMeterHolder pm( op->setMessage( "index: (1/3) external sort" , d->nrecords , 10 ) ); + ProgressMeterHolder pm( op->setMessage( "index: (1/3) external sort" , d->stats.nrecords , 10 ) ); while ( c->ok() ) { BSONObj o = c->current(); DiskLoc loc = c->currLoc(); @@ -1180,7 +1188,7 @@ namespace mongo { bool dupsAllowed = !idx.unique(); bool dropDups = idx.dropDups(); - ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->nrecords ); + ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->stats.nrecords ); unsigned long long n = 0; auto_ptr<ClientCursor> cc; @@ -1333,7 +1341,7 @@ namespace mongo { if ( d == 0 || (d->flags & NamespaceDetails::Flag_HaveIdIndex) ) return; - d->flags |= NamespaceDetails::Flag_HaveIdIndex; + *dur::writing(&d->flags) |= NamespaceDetails::Flag_HaveIdIndex; { NamespaceDetails::IndexIterator i = d->ii(); @@ -1532,6 +1540,7 @@ namespace mongo { Record *r = loc.rec(); assert( r->lengthWithHeaders >= lenWHdr ); + r = (Record*) dur::writingPtr(r, lenWHdr); if( addID ) { /* a little effort was made here to avoid a double copy when we add an ID */ ((int&)*r->data) = *((int*) obuf) + newId->size(); @@ -1542,22 +1551,25 @@ namespace mongo { if( obuf ) memcpy(r->data, obuf, len); } - Extent *e = r->myExtent(loc); + Extent *e = dur::writing(r->myExtent(loc)); if ( e->lastRecord.isNull() ) { e->firstRecord = e->lastRecord = loc; r->prevOfs = r->nextOfs = DiskLoc::NullOfs; } else { - Record *oldlast = e->lastRecord.rec(); r->prevOfs = e->lastRecord.getOfs(); r->nextOfs = DiskLoc::NullOfs; - oldlast->nextOfs = loc.getOfs(); + dur::writing(oldlast)->nextOfs = loc.getOfs(); e->lastRecord = loc; } - d->nrecords++; - d->datasize += r->netLength(); + /* durability todo : this could be a bit annoying / slow to record constantly */ + { + NamespaceDetails::Stats *s = dur::writing(&d->stats); + s->datasize += r->netLength(); + s->nrecords++; + } // we don't bother clearing those stats for the god tables - also god is true when adidng a btree bucket if ( !god ) @@ -1578,7 +1590,7 @@ namespace mongo { int idxNo = tableToIndex->nIndexes; IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str(), !background); // clear transient info caches so they refresh; increments nIndexes - idx.info = loc; + dur::writingDiskLoc(idx.info) = loc; try { buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo, background); } catch( DBException& e ) { @@ -1669,8 +1681,12 @@ namespace mongo { e->lastRecord = loc; } - d->nrecords++; - d->datasize += r->netLength(); + /* todo: don't update for oplog? seems wasteful. */ + { + NamespaceDetails::Stats *s = dur::writing(&d->stats); + s->datasize += r->netLength(); + s->nrecords++; + } return r; } diff --git a/db/pdfile.h b/db/pdfile.h index edfc4422b39..1d4001658b8 100644 --- a/db/pdfile.h +++ b/db/pdfile.h @@ -69,9 +69,7 @@ namespace mongo { */ Extent* createExtent(const char *ns, int approxSize, bool capped = false, int loops = 0); - DataFileHeader *getHeader() { - return header; - } + DataFileHeader *getHeader() { return header; } /* return max size an extent may be */ static int maxSize(); @@ -292,10 +290,7 @@ namespace mongo { return ( version == VERSION ) && ( versionMinor == VERSION_MINOR ); } - bool uninitialized() const { - if ( version == 0 ) return true; - return false; - } + bool uninitialized() const { return version == 0; } /*Record* __getRecord(DiskLoc dl) { int ofs = dl.getOfs(); @@ -307,12 +302,13 @@ namespace mongo { if ( uninitialized() ) { assert(filelength > 32768 ); assert( HeaderSize == 8192 ); - fileLength = filelength; - version = VERSION; - versionMinor = VERSION_MINOR; - unused.set( fileno, HeaderSize ); + DataFileHeader *h = dur::writing(this); + h->fileLength = filelength; + h->version = VERSION; + h->versionMinor = VERSION_MINOR; + h->unused.set( fileno, HeaderSize ); assert( (data-(char*)this) == HeaderSize ); - unusedLength = fileLength - HeaderSize - 16; + h->unusedLength = fileLength - HeaderSize - 16; //memcpy(data+unusedLength, " \nthe end\n", 16); } } diff --git a/db/query.cpp b/db/query.cpp index 8b5d24ba1f9..8f63bf44d7b 100644 --- a/db/query.cpp +++ b/db/query.cpp @@ -525,7 +525,7 @@ namespace mongo { // count of all objects if ( query.isEmpty() ){ - return applySkipLimit( d->nrecords , cmd ); + return applySkipLimit( d->stats.nrecords , cmd ); } MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true ); CountOp original( ns , cmd ); @@ -51,7 +51,7 @@ public: theDataFileMgr._deleteRecord(nsdetails_notinline(ns), ns, d.rec(), d); } - VIRT void modified(DiskLoc d) { } +//goingaway VIRT void modified(DiskLoc d) { } VIRT void drop(const char *ns) { dropNS(ns); @@ -127,11 +127,4 @@ inline BtreeBucket* DiskLoc::btree() const { return (BtreeBucket*) btreeStore->get(*this, BucketSize); } -inline BtreeBucket* DiskLoc::btreemod() const { - assert( _a != -1 ); - BtreeBucket *b = (BtreeBucket*) btreeStore->get(*this, BucketSize); - btreeStore->modified(*this); - return b; -} - } diff --git a/db/repl/manager.cpp b/db/repl/manager.cpp index 328f6d279f9..c1a7c858d18 100644 --- a/db/repl/manager.cpp +++ b/db/repl/manager.cpp @@ -55,9 +55,12 @@ namespace mongo { } Manager::~Manager() { - log() << "ERROR: ~Manager should never be called" << rsLog; + /* we don't destroy the replset object we sit in; however, the destructor could have thrown on init. + the log message below is just a reminder to come back one day and review this code more, and to + make it cleaner. + */ + log() << "info: ~Manager called" << rsLog; rs->mgr = 0; - //assert(false); } void Manager::starting() { diff --git a/db/repl/rs.h b/db/repl/rs.h index 164d179d7a3..19f8e5e0ff3 100644 --- a/db/repl/rs.h +++ b/db/repl/rs.h @@ -75,7 +75,7 @@ namespace mongo { virtual void starting(); public: Manager(ReplSetImpl *rs); - ~Manager(); + virtual ~Manager(); void msgReceivedNewConfig(BSONObj); void msgCheckNewState(); }; diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp index 3851c66827d..a0579ac967e 100644 --- a/db/repl/rs_initialsync.cpp +++ b/db/repl/rs_initialsync.cpp @@ -74,7 +74,7 @@ namespace mongo { NamespaceDetails *d = nsdetails(rsoplog); // temp - if( d && d->nrecords == 0 ) + if( d && d->stats.nrecords == 0 ) return; // already empty, ok. log(1) << "replSet empty oplog" << rsLog; diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp index aeb9b8b25e7..0fcaaece31f 100644 --- a/db/repl/rs_rollback.cpp +++ b/db/repl/rs_rollback.cpp @@ -524,7 +524,7 @@ namespace mongo { } } // did we just empty the collection? if so let's check if it even exists on the source. - if( nsd->nrecords == 0 ) { + if( nsd->stats.nrecords == 0 ) { try { string sys = cc().database()->name + ".system.namespaces"; bo o = them->findOne(sys, QUERY("name"<<d.ns)); diff --git a/dbtests/mmaptests.cpp b/dbtests/mmaptests.cpp index dd60b2f9d53..f272b63dae4 100755 --- a/dbtests/mmaptests.cpp +++ b/dbtests/mmaptests.cpp @@ -1,5 +1,4 @@ -// socktests.cpp : sock.{h,cpp} unit tests. -// +// @file mmaptests.cpp /** * Copyright (C) 2008 10gen Inc. diff --git a/dbtests/namespacetests.cpp b/dbtests/namespacetests.cpp index ca051fe15f9..5588bf7c2cd 100644 --- a/dbtests/namespacetests.cpp +++ b/dbtests/namespacetests.cpp @@ -604,7 +604,7 @@ namespace NamespaceTests { ++count; } } - ASSERT_EQUALS( count, nsd()->nrecords ); + ASSERT_EQUALS( count, nsd()->stats.nrecords ); return count; } int nExtents() const { @@ -620,7 +620,7 @@ namespace NamespaceTests { return ns_; } NamespaceDetails *nsd() const { - return nsdetails( ns() ); + return dur::writing( nsdetails( ns() ) ); } static BSONObj bigObj() { string as( 187, 'a' ); @@ -737,9 +737,9 @@ namespace NamespaceTests { } DiskLoc d = l[6]; - long long n = nsd->nrecords; + long long n = nsd->stats.nrecords; nsd->cappedTruncateAfter(ns(), d, false); - ASSERT_EQUALS( nsd->nrecords , n-1 ); + ASSERT_EQUALS( nsd->stats.nrecords , n-1 ); { ForwardCappedCursor c(nsd); @@ -770,7 +770,7 @@ namespace NamespaceTests { void run() { create(); nsd()->deletedList[ 2 ] = nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted; - nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted = DiskLoc(); + nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted.writing() = DiskLoc(); nsd()->cappedLastDelRecLastExtent().Null(); NamespaceDetails *d = nsd(); zero( &d->capExtent ); diff --git a/dbtests/test.vcxproj b/dbtests/test.vcxproj index 8e0173c11d4..8b7e5b95fed 100644 --- a/dbtests/test.vcxproj +++ b/dbtests/test.vcxproj @@ -97,7 +97,7 @@ <ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>_NOTDURABLE;_UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>_DURABLE;_UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>No</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -263,6 +263,7 @@ <ClCompile Include="..\client\model.cpp" />
<ClCompile Include="..\client\parallel.cpp" />
<ClCompile Include="..\db\cap.cpp" />
+ <ClCompile Include="..\db\dur.cpp" />
<ClCompile Include="..\db\geo\2d.cpp" />
<ClCompile Include="..\db\geo\haystack.cpp" />
<ClCompile Include="..\db\repl\consensus.cpp" />
diff --git a/dbtests/test.vcxproj.filters b/dbtests/test.vcxproj.filters index 3302cbf79e6..16b8368ab7c 100755 --- a/dbtests/test.vcxproj.filters +++ b/dbtests/test.vcxproj.filters @@ -704,6 +704,9 @@ <ClCompile Include="..\scripting\bench.cpp">
<Filter>scripting</Filter>
</ClCompile>
+ <ClCompile Include="..\db\dur.cpp">
+ <Filter>db\cpp</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\SConstruct">
diff --git a/jstests/evald.js b/jstests/evald.js index 1131a21f414..88566a19585 100644 --- a/jstests/evald.js +++ b/jstests/evald.js @@ -2,7 +2,7 @@ t = db.jstests_evald; t.drop(); function debug( x ) { -// printjson( x ); + printjson( x ); } for( i = 0; i < 10; ++i ) { diff --git a/jstests/geo_update2.js b/jstests/geo_update2.js new file mode 100644 index 00000000000..0a6bb2f0b00 --- /dev/null +++ b/jstests/geo_update2.js @@ -0,0 +1,40 @@ + +t = db.geo_update1 +t.drop() + +for(var x = 0; x < 10; x++ ) { + for(var y = 0; y < 10; y++ ) { + t.insert({"loc": [x, y] , x : x , y : y }); + } +} + +t.ensureIndex( { loc : "2d" } ) + +function p(){ + print( "--------------" ); + for ( var y=0; y<10; y++ ){ + var c = t.find( { y : y } ).sort( { x : 1 } ) + var s = ""; + while ( c.hasNext() ) + s += c.next().z + " "; + print( s ) + } + print( "--------------" ); +} + +p() + +/* SERVER-1821 +t.update({"loc" : {"$within" : {"$center" : [[5,5], 2]}}}, {'$inc' : { 'z' : 1}}, false, true); +assert.isnull( db.getLastError() , "B1" ) +p() + +t.update({}, {'$inc' : { 'z' : 1}}, false, true); +assert.isnull( db.getLastError() , "B2" ) +p() + + +t.update({"loc" : {"$within" : {"$center" : [[5,5], 2]}}}, {'$inc' : { 'z' : 1}}, false, true); +assert.isnull( db.getLastError() , "B3" ) +p() +*/ diff --git a/s/d_split.cpp b/s/d_split.cpp index 208cf665899..5c569da391b 100644 --- a/s/d_split.cpp +++ b/s/d_split.cpp @@ -196,8 +196,8 @@ namespace mongo { return false; } - const long long recCount = d->nrecords; - const long long dataSize = d->datasize; + const long long recCount = d->stats.nrecords; + const long long dataSize = d->stats.datasize; // If there's not enough data for more than one chunk, no point continuing. if ( dataSize < maxChunkSize || recCount == 0 ) { diff --git a/util/hashtab.h b/util/hashtab.h index 16c5483eda6..6604864e65a 100644 --- a/util/hashtab.h +++ b/util/hashtab.h @@ -24,6 +24,7 @@ #include "../pch.h" #include <map> +#include "../db/dur.h" namespace mongo { @@ -127,35 +128,28 @@ namespace mongo { bool found; int i = _find(k, found); if ( i >= 0 && found ) { - Node& n = nodes(i); - n.k.kill(); - n.setUnused(); + Node* n = &nodes(i); + n = dur::writing(n); + n->k.kill(); + n->setUnused(); } } -/* - void drop(const Key& k) { - bool found; - int i = _find(k, found); - if ( i >= 0 && found ) { - nodes[i].setUnused(); - } - } -*/ + /** returns false if too full */ bool put(const Key& k, const Type& value) { bool found; int i = _find(k, found); if ( i < 0 ) return false; - Node& n = nodes(i); + Node* n = dur::writing( &nodes(i) ); if ( !found ) { - n.k = k; - n.hash = k.hash(); + n->k = k; + n->hash = k.hash(); } else { - assert( n.hash == k.hash() ); + assert( n->hash == k.hash() ); } - n.value = value; + n->value = value; return true; } diff --git a/util/message.h b/util/message.h index 9651141ad6c..84c4e24ee45 100644 --- a/util/message.h +++ b/util/message.h @@ -225,11 +225,15 @@ struct OP_GETMORE : public MSGHEADER { int len; /* len of the msg, including this field */ MSGID id; /* request/reply id's match... */ MSGID responseTo; /* id of the message we are responding to */ - int _operation; + short _operation; + char _flags; + char _version; int operation() const { return _operation; } void setOperation(int o) { + _flags = 0; + _version = 0; _operation = o; } char _data[4]; @@ -241,7 +245,7 @@ struct OP_GETMORE : public MSGHEADER { bool valid(){ if ( len <= 0 || len > ( 1024 * 1024 * 10 ) ) return false; - if ( _operation < 0 || _operation > 100000 ) + if ( _operation < 0 || _operation > 30000 ) return false; return true; } diff --git a/util/mmap.h b/util/mmap.h index 826c8b07b82..eca6db811fb 100644 --- a/util/mmap.h +++ b/util/mmap.h @@ -136,10 +136,12 @@ namespace mongo { }; MemoryMappedFile(); + ~MemoryMappedFile() { - destroyed(); + destroyed(); // cleans up from the master list of mmaps close(); } + void close(); void* testGetCopyOnWriteView(); @@ -171,15 +173,15 @@ namespace mongo { void flush(bool sync); virtual Flushable * prepareFlush(); - /*void* viewOfs() { - return view; - }*/ - long shortLength() const { return (long) len; } unsigned long long length() const { return len; } string filename() const { return _filename; } +#if defined(_DURABLE) && defined(_DEBUG) + static void* getWriteViewFor(void *ptr); +#endif + private: static void updateLength( const char *filename, unsigned long long &length ); @@ -192,7 +194,9 @@ namespace mongo { #ifdef _WIN32 boost::shared_ptr<mutex> _flushMutex; #endif - +#if defined(_DURABLE) + void *writeView; +#endif protected: // only posix mmap implementations will support this virtual void _lock(); diff --git a/util/mmap_win.cpp b/util/mmap_win.cpp index a21e7505eb8..5901804da67 100644 --- a/util/mmap_win.cpp +++ b/util/mmap_win.cpp @@ -22,8 +22,11 @@ namespace mongo { + static map<void *, MemoryMappedFile*> viewToWriteable; + static mutex viewToWriteableMutex("viewToWriteableMutex"); + MemoryMappedFile::MemoryMappedFile() - : _flushMutex(new mutex("flushMutex")) + : _flushMutex(new mutex("flushMutex")), _filename("??") { fd = 0; maphandle = 0; @@ -33,8 +36,17 @@ namespace mongo { } void MemoryMappedFile::close() { - if ( view ) + //log() << "dur mmap close " << filename() << endl; + if ( view ) { + { + mutex::scoped_lock lk(viewToWriteableMutex); + viewToWriteable.erase(view); + } UnmapViewOfFile(view); +#if defined(_DURABLE) + UnmapViewOfFile(writeView); +#endif + } view = 0; if ( maphandle ) CloseHandle(maphandle); @@ -61,6 +73,9 @@ namespace mongo { } void* MemoryMappedFile::map(const char *filenameIn, unsigned long long &length, int options) { +#if defined(_DURABLE) + options |= READONLY; +#endif _filename = filenameIn; /* big hack here: Babble uses db names with colons. doesn't seem to work on windows. temporary perhaps. */ char filename[256]; @@ -80,37 +95,38 @@ namespace mongo { updateLength( filename, length ); - DWORD createOptions = FILE_ATTRIBUTE_NORMAL; - if ( options & SEQUENTIAL ) - createOptions |= FILE_FLAG_SEQUENTIAL_SCAN; - DWORD rw = GENERIC_READ | GENERIC_WRITE; - //if ( options & READONLY ) - // rw = GENERIC_READ; - - fd = CreateFile( - toNativeString(filename).c_str(), - rw, // desired access - FILE_SHARE_READ, // share mode - NULL, // security - OPEN_ALWAYS, // create disposition - createOptions , // flags - NULL); // hTempl - if ( fd == INVALID_HANDLE_VALUE ) { - log() << "Create/OpenFile failed " << filename << ' ' << GetLastError() << endl; - return 0; + { + DWORD createOptions = FILE_ATTRIBUTE_NORMAL; + if ( options & SEQUENTIAL ) + createOptions |= FILE_FLAG_SEQUENTIAL_SCAN; + DWORD rw = GENERIC_READ | GENERIC_WRITE; + fd = CreateFile( + toNativeString(filename).c_str(), + rw, // desired access + FILE_SHARE_READ, // share mode + NULL, // security + OPEN_ALWAYS, // create disposition + createOptions , // flags + NULL); // hTempl + if ( fd == INVALID_HANDLE_VALUE ) { + log() << "Create/OpenFile failed " << filename << ' ' << GetLastError() << endl; + return 0; + } } mapped += length; - DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE; - maphandle = CreateFileMapping(fd, NULL, flProtect, - length >> 32 /*maxsizehigh*/, - (unsigned) length /*maxsizelow*/, - NULL/*lpName*/); - if ( maphandle == NULL ) { - DWORD e = GetLastError(); // log() call was killing lasterror before we get to that point in the stream - log() << "CreateFileMapping failed " << filename << ' ' << errnoWithDescription(e) << endl; - return 0; + { + DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE; + maphandle = CreateFileMapping(fd, NULL, flProtect, + length >> 32 /*maxsizehigh*/, + (unsigned) length /*maxsizelow*/, + NULL/*lpName*/); + if ( maphandle == NULL ) { + DWORD e = GetLastError(); // log() call was killing lasterror before we get to that point in the stream + log() << "CreateFileMapping failed " << filename << ' ' << errnoWithDescription(e) << endl; + return 0; + } } { @@ -122,9 +138,58 @@ namespace mongo { log() << "MapViewOfFile failed " << filename << " " << errnoWithDescription(e) << endl; } len = length; + +#if defined(_DURABLE) + { + if( !( options & READONLY ) ) { + log() << "dur: not readonly view which is wrong : " << filename << endl; + } + void *p = MapViewOfFile(maphandle, FILE_MAP_ALL_ACCESS, /*f ofs hi*/0, /*f ofs lo*/ 0, /*dwNumberOfBytesToMap 0 means to eof*/0); + assert( p ); + writeView = p; + { + mutex::scoped_lock lk(viewToWriteableMutex); + viewToWriteable[view] = this; + } + log() << filenameIn << endl; + log() << " ro: " << view << " - " << (void*) (((char *)view)+length) << endl; + log() << " w : " << writeView << " - " << (void*) (((char *)writeView)+length) << endl; + } +#endif + return view; } +#if defined(_DURABLE) && defined(_DEBUG) + void* MemoryMappedFile::getWriteViewFor(void *p) { + mutex::scoped_lock lk(viewToWriteableMutex); + std::map< void*, MemoryMappedFile* >::iterator i = + viewToWriteable.upper_bound(((char *)p)+1); + i--; + assert( i != viewToWriteable.end() ); + MemoryMappedFile *mmf = i->second; + assert( mmf ); + + size_t ofs = ((char *)p) - ((char*)mmf->view); + + if( ofs >= mmf->len ) { + log() << "getWriteViewFor error? " << p << endl; + for( std::map<void*,MemoryMappedFile*>::iterator i = viewToWriteable.begin(); i != viewToWriteable.end(); i++ ) { + char *wl = (char *) i->second->writeView; + char *wh = wl + i->second->length(); + if( p >= wl && p < wh ) { + log() << "dur ERROR p " << p << " is already in the writable view of " << i->second->filename() << endl; + //wassert(false); + // could do this: + return p; + } + } + assert( ofs < mmf->len ); // did you call writing() with a pointer that isn't into a datafile? + } + return ((char *)mmf->writeView) + ofs; + } +#endif + class WindowsFlushable : public MemoryMappedFile::Flushable { public: WindowsFlushable( void * view , HANDLE fd , string filename , boost::shared_ptr<mutex> flushMutex ) |