summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--db/btree.cpp51
-rw-r--r--db/btree.h5
-rw-r--r--db/cap.cpp46
-rw-r--r--db/clientcursor.h19
-rw-r--r--db/dbcommands.cpp24
-rw-r--r--db/dbcommands_admin.cpp4
-rw-r--r--db/diskloc.h2
-rw-r--r--db/dur.cpp32
-rw-r--r--db/dur.h68
-rw-r--r--db/namespace.cpp53
-rw-r--r--db/namespace.h21
-rw-r--r--db/pdfile.cpp102
-rw-r--r--db/pdfile.h20
-rw-r--r--db/query.cpp2
-rw-r--r--db/rec.h9
-rw-r--r--db/repl/manager.cpp7
-rw-r--r--db/repl/rs.h2
-rw-r--r--db/repl/rs_initialsync.cpp2
-rw-r--r--db/repl/rs_rollback.cpp2
-rwxr-xr-xdbtests/mmaptests.cpp3
-rw-r--r--dbtests/namespacetests.cpp10
-rw-r--r--dbtests/test.vcxproj3
-rwxr-xr-xdbtests/test.vcxproj.filters3
-rw-r--r--s/d_split.cpp4
-rw-r--r--util/hashtab.h28
-rw-r--r--util/mmap.h16
-rw-r--r--util/mmap_win.cpp123
27 files changed, 429 insertions, 232 deletions
diff --git a/db/btree.cpp b/db/btree.cpp
index 1528951a047..43b155aa16d 100644
--- a/db/btree.cpp
+++ b/db/btree.cpp
@@ -31,6 +31,12 @@ namespace mongo {
#define VERIFYTHISLOC dassert( thisLoc.btree() == this );
+ BtreeBucket* DiskLoc::btreemod() const {
+ assert( _a != -1 );
+ BtreeBucket *b = (BtreeBucket*) btreeStore->get(*this, BucketSize);
+ return dur::writing(b);
+ }
+
KeyNode::KeyNode(const BucketBasics& bb, const _KeyNode &k) :
prevChildBucket(k.prevChildBucket),
recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
@@ -50,8 +56,8 @@ namespace mongo {
/* BucketBasics --------------------------------------------------- */
inline void BucketBasics::modified(const DiskLoc& thisLoc) {
- VERIFYTHISLOC
- btreeStore->modified(thisLoc);
+// VERIFYTHISLOC
+// btreeStore->modified(thisLoc);
}
int BucketBasics::Size() const {
@@ -222,6 +228,7 @@ namespace mongo {
the keynodes grow from the front.
*/
inline int BucketBasics::_alloc(int bytes) {
+ dur::assertWriting(this);
topSize += bytes;
emptySize -= bytes;
int ofs = totalDataSize() - topSize;
@@ -284,7 +291,6 @@ namespace mongo {
/* insert a key in a bucket with no complexity -- no splits required */
bool BucketBasics::basicInsert(const DiskLoc& thisLoc, int &keypos, const DiskLoc& recordLoc, const BSONObj& key, const Ordering &order) {
- modified(thisLoc);
assert( keypos >= 0 && keypos <= n );
int bytesNeeded = key.objsize() + sizeof(_KeyNode);
if ( bytesNeeded > emptySize ) {
@@ -294,13 +300,16 @@ namespace mongo {
}
for ( int j = n; j > keypos; j-- ) // make room
k(j) = k(j-1);
- n++;
- emptySize -= sizeof(_KeyNode);
- _KeyNode& kn = k(keypos);
+
+ BucketBasics *b = this;//dur::writing(this);
+
+ b->n++;
+ b->emptySize -= sizeof(_KeyNode);
+ _KeyNode& kn = b->k(keypos);
kn.prevChildBucket.Null();
kn.recordLoc = recordLoc;
- kn.setKeyDataOfs((short) _alloc(key.objsize()) );
- char *p = dataAt(kn.keyDataOfs());
+ kn.setKeyDataOfs((short) b->_alloc(key.objsize()) );
+ char *p = b->dataAt(kn.keyDataOfs());
memcpy(p, key.objdata(), key.objsize());
return true;
}
@@ -633,7 +642,7 @@ found:
bool found;
DiskLoc loc = locate(id, thisLoc, key, Ordering::make(id.keyPattern()), pos, found, recordLoc, 1);
if ( found ) {
- loc.btree()->delKeyAtPos(loc, id, pos);
+ loc.btreemod()->delKeyAtPos(loc, id, pos);
return true;
}
return false;
@@ -661,16 +670,10 @@ found:
fix(thisLoc, k(i).prevChildBucket);
}
- /* insert a key in this bucket, splitting if necessary.
- keypos - where to insert the key i3n range 0..n. 0=make leftmost, n=make rightmost.
- NOTE this function may free some data, and as a result the value passed for keypos may
- be invalid after calling insertHere()
- */
- void BtreeBucket::insertHere(DiskLoc thisLoc, int keypos,
+ void BtreeBucket::_insertHere(DiskLoc thisLoc, int keypos,
DiskLoc recordLoc, const BSONObj& key, const Ordering& order,
DiskLoc lchild, DiskLoc rchild, IndexDetails& idx)
{
- modified(thisLoc);
if ( insert_debug )
out() << " " << thisLoc.toString() << ".insertHere " << key.toString() << '/' << recordLoc.toString() << ' '
<< lchild.toString() << ' ' << rchild.toString() << " keypos:" << keypos << endl;
@@ -806,6 +809,20 @@ found:
out() << " split end " << hex << thisLoc.getOfs() << dec << endl;
}
+ /* insert a key in this bucket, splitting if necessary.
+ keypos - where to insert the key i3n range 0..n. 0=make leftmost, n=make rightmost.
+ NOTE this function may free some data, and as a result the value passed for keypos may
+ be invalid after calling insertHere()
+ */
+ void BtreeBucket::insertHere(DiskLoc thisLoc, int keypos,
+ DiskLoc recordLoc, const BSONObj& key, const Ordering& order,
+ DiskLoc lchild, DiskLoc rchild, IndexDetails& idx)
+ {
+ modified(thisLoc);
+ BtreeBucket *b = dur::writing(this);
+ b->_insertHere(thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx);
+ }
+
/* start a new index off, empty */
DiskLoc BtreeBucket::addBucket(IndexDetails& id) {
DiskLoc loc = btreeStore->insert(id.indexNamespace().c_str(), 0, BucketSize, true);
@@ -1247,7 +1264,7 @@ namespace mongo {
while( 1 ) {
if( loc.btree()->tempNext().isNull() ) {
// only 1 bucket at this level. we are done.
- idx.head = loc;
+ dur::writingDiskLoc(idx.head) = loc;
break;
}
levels++;
diff --git a/db/btree.h b/db/btree.h
index bb31081b00d..63b1b1aa148 100644
--- a/db/btree.h
+++ b/db/btree.h
@@ -253,9 +253,14 @@ namespace mongo {
return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key;
}
static BtreeBucket* allocTemp(); /* caller must release with free() */
+
+ void _insertHere(DiskLoc thisLoc, int keypos,
+ DiskLoc recordLoc, const BSONObj& key, const Ordering &order,
+ DiskLoc lchild, DiskLoc rchild, IndexDetails&);
void insertHere(DiskLoc thisLoc, int keypos,
DiskLoc recordLoc, const BSONObj& key, const Ordering &order,
DiskLoc lchild, DiskLoc rchild, IndexDetails&);
+
int _insert(DiskLoc thisLoc, DiskLoc recordLoc,
const BSONObj& key, const Ordering &order, bool dupsAllowed,
DiskLoc lChild, DiskLoc rChild, IndexDetails&);
diff --git a/db/cap.cpp b/db/cap.cpp
index e80f27eb873..c36e57c1b58 100644
--- a/db/cap.cpp
+++ b/db/cap.cpp
@@ -63,7 +63,8 @@ namespace mongo {
DiskLoc i = cappedFirstDeletedInCurExtent();
for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted )
drecs.push_back( i );
- cappedFirstDeletedInCurExtent() = i;
+
+ dur::writingDiskLoc( cappedFirstDeletedInCurExtent() ) = i;
// This is the O(n^2) part.
drecs.sort();
@@ -81,7 +82,7 @@ namespace mongo {
DiskLoc b = *j;
while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) {
// a & b are adjacent. merge.
- a.drec()->lengthWithHeaders += b.drec()->lengthWithHeaders;
+ dur::writingInt( a.drec()->lengthWithHeaders ) += b.drec()->lengthWithHeaders;
j++;
if ( j == drecs.end() ) {
DEBUGGING out() << "temp: compact adddelrec2\n";
@@ -107,8 +108,8 @@ namespace mongo {
// migrate old NamespaceDetails format
assert( capped );
if ( capExtent.a() == 0 && capExtent.getOfs() == 0 ) {
- capFirstNewRecord = DiskLoc();
- capFirstNewRecord.setInvalid();
+ //capFirstNewRecord = DiskLoc();
+ capFirstNewRecord.writing().setInvalid();
// put all the DeletedRecords in cappedListOfAllDeletedRecords()
for ( int i = 1; i < Buckets; ++i ) {
DiskLoc first = deletedList[ i ];
@@ -116,14 +117,14 @@ namespace mongo {
continue;
DiskLoc last = first;
for (; !last.drec()->nextDeleted.isNull(); last = last.drec()->nextDeleted );
- last.drec()->nextDeleted = cappedListOfAllDeletedRecords();
- cappedListOfAllDeletedRecords() = first;
- deletedList[ i ] = DiskLoc();
+ last.drec()->nextDeleted.writing() = cappedListOfAllDeletedRecords();
+ cappedListOfAllDeletedRecords().writing() = first;
+ deletedList[i].writing() = DiskLoc();
}
// NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above
// Last, in case we're killed before getting here
- capExtent = firstExtent;
+ capExtent.writing() = firstExtent;
}
}
@@ -145,20 +146,20 @@ namespace mongo {
// We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent
// (or DiskLoc() if new capExtent == firstExtent)
if ( capExtent == lastExtent )
- cappedLastDelRecLastExtent() = DiskLoc();
+ dur::writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc();
else {
DiskLoc i = cappedFirstDeletedInCurExtent();
for (; !i.isNull() && nextIsInCapExtent( i ); i = i.drec()->nextDeleted );
- cappedLastDelRecLastExtent() = i;
+ dur::writingDiskLoc( cappedLastDelRecLastExtent() ) = i;
}
- capExtent = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext;
+ dur::writingDiskLoc( capExtent ) = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext;
/* this isn't true if a collection has been renamed...that is ok just used for diagnostics */
//dassert( theCapExtent()->ns == ns );
theCapExtent()->assertOk();
- capFirstNewRecord = DiskLoc();
+ dur::writingDiskLoc( capFirstNewRecord ) = DiskLoc();
}
DiskLoc NamespaceDetails::__capAlloc( int len ) {
@@ -177,10 +178,10 @@ namespace mongo {
/* unlink ourself from the deleted list */
if ( !ret.isNull() ) {
if ( prev.isNull() )
- cappedListOfAllDeletedRecords() = ret.drec()->nextDeleted;
+ cappedListOfAllDeletedRecords().writing() = ret.drec()->nextDeleted;
else
- prev.drec()->nextDeleted = ret.drec()->nextDeleted;
- ret.drec()->nextDeleted.setInvalid(); // defensive.
+ prev.drec()->nextDeleted.writing() = ret.drec()->nextDeleted;
+ ret.drec()->nextDeleted.writing().setInvalid(); // defensive.
assert( ret.drec()->extentOfs < ret.getOfs() );
}
@@ -190,7 +191,7 @@ namespace mongo {
DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) {
// signal done allocating new extents.
if ( !cappedLastDelRecLastExtent().isValid() )
- cappedLastDelRecLastExtent() = DiskLoc();
+ dur::writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc();
assert( len < 400000000 );
int passes = 0;
@@ -209,7 +210,7 @@ namespace mongo {
theCapExtent()->assertOk();
DiskLoc firstEmptyExtent;
while ( 1 ) {
- if ( nrecords < max ) {
+ if ( stats.nrecords < max ) {
loc = __capAlloc( len );
if ( !loc.isNull() )
break;
@@ -218,8 +219,9 @@ namespace mongo {
// If on first iteration through extents, don't delete anything.
if ( !capFirstNewRecord.isValid() ) {
advanceCapExtent( ns );
+
if ( capExtent != firstExtent )
- capFirstNewRecord.setInvalid();
+ capFirstNewRecord.writing().setInvalid();
// else signal done with first iteration through extents.
continue;
}
@@ -248,14 +250,14 @@ namespace mongo {
compact();
if( ++passes > maxPasses ) {
log() << "passes ns:" << ns << " len:" << len << " maxPasses: " << maxPasses << '\n';
- log() << "passes max:" << max << " nrecords:" << nrecords << " datasize: " << datasize << endl;
+ log() << "passes max:" << max << " nrecords:" << stats.nrecords << " datasize: " << stats.datasize << endl;
massert( 10345 , "passes >= maxPasses in capped collection alloc", false );
}
}
// Remember first record allocated on this iteration through capExtent.
if ( capFirstNewRecord.isValid() && capFirstNewRecord.isNull() )
- capFirstNewRecord = loc;
+ dur::writingDiskLoc(capFirstNewRecord) = loc;
return loc;
}
@@ -301,7 +303,7 @@ namespace mongo {
}
}
- uassert( 13415, "emptying the collection is not allowed", nrecords > 1 );
+ uassert( 13415, "emptying the collection is not allowed", stats.nrecords > 1 );
if ( !capLooped() ) {
theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true);
@@ -363,7 +365,7 @@ namespace mongo {
// preserve firstExtent/lastExtent
capExtent = firstExtent;
- datasize = nrecords = 0;
+ stats.datasize = stats.nrecords = 0;
// lastExtentSize preserve
// nIndexes preserve 0
// capped preserve true
diff --git a/db/clientcursor.h b/db/clientcursor.h
index 729b5a5b412..08e3311d1a7 100644
--- a/db/clientcursor.h
+++ b/db/clientcursor.h
@@ -292,33 +292,22 @@ namespace mongo {
void storeOpForSlave( DiskLoc last );
void updateSlaveLocation( CurOp& curop );
- unsigned idleTime(){
- return _idleAgeMillis;
- }
+ unsigned idleTime() const { return _idleAgeMillis; }
static void idleTimeReport(unsigned millis);
private:
// cursors normally timeout after an inactivy period to prevent excess memory use
// setting this prevents timeout of the cursor in question.
- void noTimeout() {
- _pinValue++;
- }
+ void noTimeout() { _pinValue++; }
- multimap<DiskLoc, ClientCursor*>& byLoc() {
- return _db->ccByLoc;
- }
+ multimap<DiskLoc, ClientCursor*>& byLoc() { return _db->ccByLoc; }
public:
- void setDoingDeletes( bool doingDeletes ){
- _doingDeletes = doingDeletes;
- }
+ void setDoingDeletes( bool doingDeletes ) {_doingDeletes = doingDeletes; }
static void appendStats( BSONObjBuilder& result );
-
static unsigned numCursors() { return clientCursorsById.size(); }
-
static void informAboutToDeleteBucket(const DiskLoc& b);
static void aboutToDelete(const DiskLoc& dl);
-
static void find( const string& ns , set<CursorId>& all );
};
diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp
index 8bc79d17b0f..28f0ebb6705 100644
--- a/db/dbcommands.cpp
+++ b/db/dbcommands.cpp
@@ -954,7 +954,7 @@ namespace mongo {
Client::Context ctx( ns );
NamespaceDetails *d = nsdetails(ns.c_str());
- if ( ! d || d->nrecords == 0 ){
+ if ( ! d || d->stats.nrecords == 0 ){
result.appendNumber( "size" , 0 );
result.appendNumber( "numObjects" , 0 );
result.append( "millis" , timer.millis() );
@@ -966,8 +966,8 @@ namespace mongo {
shared_ptr<Cursor> c;
if ( min.isEmpty() && max.isEmpty() ) {
if ( estimate ){
- result.appendNumber( "size" , d->datasize );
- result.appendNumber( "numObjects" , d->nrecords );
+ result.appendNumber( "size" , d->stats.datasize );
+ result.appendNumber( "numObjects" , d->stats.nrecords );
result.append( "millis" , timer.millis() );
return 1;
}
@@ -985,7 +985,7 @@ namespace mongo {
c.reset( new BtreeCursor( d, d->idxNo(*idx), *idx, min, max, false, 1 ) );
}
- long long avgObjSize = d->datasize / d->nrecords;
+ long long avgObjSize = d->stats.datasize / d->stats.nrecords;
long long maxSize = jsobj["maxSize"].numberLong();
long long maxObjects = jsobj["maxObjects"].numberLong();
@@ -1043,9 +1043,9 @@ namespace mongo {
log() << "error: have index [" << collNS << "] but no NamespaceDetails" << endl;
continue;
}
- totalSize += mine->datasize;
+ totalSize += mine->stats.datasize;
if ( details )
- details->appendNumber( d.indexName() , mine->datasize / scale );
+ details->appendNumber( d.indexName() , mine->stats.datasize / scale );
}
return totalSize;
}
@@ -1085,10 +1085,10 @@ namespace mongo {
return false;
}
- long long size = nsd->datasize / scale;
- result.appendNumber( "count" , nsd->nrecords );
+ long long size = nsd->stats.datasize / scale;
+ result.appendNumber( "count" , nsd->stats.nrecords );
result.appendNumber( "size" , size );
- result.append ( "avgObjSize" , double(size) / double(nsd->nrecords) );
+ result.append ( "avgObjSize" , double(size) / double(nsd->stats.nrecords) );
int numExtents;
result.appendNumber( "storageSize" , nsd->storageSize( &numExtents ) / scale );
result.append( "numExtents" , numExtents );
@@ -1143,8 +1143,8 @@ namespace mongo {
}
ncollections += 1;
- objects += nsd->nrecords;
- size += nsd->datasize;
+ objects += nsd->stats.nrecords;
+ size += nsd->stats.datasize;
int temp;
storageSize += nsd->storageSize( &temp );
@@ -1191,7 +1191,7 @@ namespace mongo {
string toNs = dbname + "." + to;
NamespaceDetails *nsd = nsdetails( fromNs.c_str() );
massert( 10301 , "source collection " + fromNs + " does not exist", nsd );
- long long excessSize = nsd->datasize - size * 2; // datasize and extentSize can't be compared exactly, so add some padding to 'size'
+ long long excessSize = nsd->stats.datasize - size * 2; // datasize and extentSize can't be compared exactly, so add some padding to 'size'
DiskLoc extent = nsd->firstExtent;
for( ; excessSize > extent.ext()->length && extent != nsd->lastExtent; extent = extent.ext()->xnext ) {
excessSize -= extent.ext()->length;
diff --git a/db/dbcommands_admin.cpp b/db/dbcommands_admin.cpp
index b7ad9602c5f..a37bb4f6ce1 100644
--- a/db/dbcommands_admin.cpp
+++ b/db/dbcommands_admin.cpp
@@ -128,7 +128,7 @@ namespace mongo {
ss << " extent asserted ";
}
- ss << " datasize?:" << d->datasize << " nrecords?:" << d->nrecords << " lastExtentSize:" << d->lastExtentSize << '\n';
+ ss << " datasize?:" << d->stats.datasize << " nrecords?:" << d->stats.nrecords << " lastExtentSize:" << d->lastExtentSize << '\n';
ss << " padding:" << d->paddingFactor << '\n';
try {
@@ -175,7 +175,7 @@ namespace mongo {
else ss << " (OK)";
ss << '\n';
}
- ss << " " << n << " objects found, nobj:" << d->nrecords << '\n';
+ ss << " " << n << " objects found, nobj:" << d->stats.nrecords << '\n';
ss << " " << len << " bytes data w/headers\n";
ss << " " << nlen << " bytes data wout/headers\n";
}
diff --git a/db/diskloc.h b/db/diskloc.h
index 1f06fc58778..0f675c25d90 100644
--- a/db/diskloc.h
+++ b/db/diskloc.h
@@ -127,6 +127,8 @@ namespace mongo {
return compare(b) < 0;
}
+ DiskLoc& writing(); // see dur.h
+
/* Get the "thing" associated with this disk location.
it is assumed the object is what you say it is -- you must assure that
(think of this as an unchecked type cast)
diff --git a/db/dur.cpp b/db/dur.cpp
new file mode 100644
index 00000000000..3d63b5003f4
--- /dev/null
+++ b/db/dur.cpp
@@ -0,0 +1,32 @@
+// @file dur.cpp
+
+#include "pch.h"
+#include "dur.h"
+#include "../util/mmap.h"
+
+namespace mongo {
+
+ namespace dur {
+
+#if defined(_DEBUG) && defined(_DURABLE)
+
+ void* writingPtr(void *x, size_t len) {
+ cout << "TEMP writing " << x << ' ' << len << endl;
+ return MemoryMappedFile::getWriteViewFor(x);
+ }
+
+ void assertReading(void *p) {
+ assert( MemoryMappedFile::getWriteViewFor(p) !=
+ p );
+ }
+ void assertWriting(void *p) {
+ // todo:
+ //assert( MemoryMappedFile::getWriteViewFor(p) ==
+ // p );
+ }
+
+#endif
+
+ }
+
+}
diff --git a/db/dur.h b/db/dur.h
new file mode 100644
index 00000000000..c139bedb59a
--- /dev/null
+++ b/db/dur.h
@@ -0,0 +1,68 @@
+// @file dur.h durability support
+
+#pragma once
+
+#include "diskloc.h"
+
+namespace mongo {
+
+ namespace dur {
+
+ /** call writing...() to declare "i'm about to write to x and it should be logged for redo."
+
+ failure to call writing...() is checked in _DEBUG mode by using a read only mapped view
+ (i.e., you'll segfault if you don't...)
+ */
+
+
+#if !defined(_DURABLE)
+
+ inline void* writingPtr(void *x, size_t len) { return x; }
+ inline DiskLoc& writingDiskLoc(DiskLoc& d) { return d; }
+ inline int& writingInt(int& d) { return d; }
+ template <typename T> inline T* writing(T *x) { return x; }
+ inline void assertReading(void *p) { }
+ inline void assertWriting(void *p) { }
+
+#else
+
+ void* writingPtr(void *x, size_t len);
+
+ inline DiskLoc& writingDiskLoc(DiskLoc& d) {
+#if defined(_DEBUG)
+ return *((DiskLoc*) writingPtr(&d, sizeof(d)));
+#else
+ return d;
+#endif
+ }
+
+ inline int& writingInt(int& d) {
+#if defined(_DEBUG)
+ return *((int*) writingPtr(&d, sizeof(d)));
+#else
+ return d;
+#endif
+ }
+
+ template <typename T>
+ inline
+ T* writing(T *x) {
+#if defined(_DEBUG)
+ return (T*) writingPtr(x, sizeof(T));
+#else
+ return x;
+#endif
+ }
+
+ void assertReading(void *p);
+ void assertWriting(void *p);
+
+#endif
+
+ }
+
+ inline DiskLoc& DiskLoc::writing() {
+ return dur::writingDiskLoc(*this);
+ }
+
+}
diff --git a/db/namespace.cpp b/db/namespace.cpp
index 682300a7dd8..fcdb64d0a94 100644
--- a/db/namespace.cpp
+++ b/db/namespace.cpp
@@ -47,7 +47,7 @@ namespace mongo {
NamespaceDetails::NamespaceDetails( const DiskLoc &loc, bool _capped ) {
/* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */
firstExtent = lastExtent = capExtent = loc;
- datasize = nrecords = 0;
+ stats.datasize = stats.nrecords = 0;
lastExtentSize = 0;
nIndexes = 0;
capped = _capped;
@@ -181,41 +181,43 @@ namespace mongo {
}
void NamespaceDetails::addDeletedRec(DeletedRecord *d, DiskLoc dloc) {
+ dur::assertReading(this);
BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) );
+ dassert( dloc.drec() == d );
+ DeletedRecord *dold = d;
+ d = dur::writing(d);
{
// defensive code: try to make us notice if we reference a deleted record
(unsigned&) (((Record *) d)->data) = 0xeeeeeeee;
}
- dassert( dloc.drec() == d );
- DEBUGGING out() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl;
+ DEBUGGING log() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl;
if ( capped ) {
if ( !cappedLastDelRecLastExtent().isValid() ) {
// Initial extent allocation. Insert at end.
d->nextDeleted = DiskLoc();
if ( cappedListOfAllDeletedRecords().isNull() )
- cappedListOfAllDeletedRecords() = dloc;
+ dur::writingDiskLoc( cappedListOfAllDeletedRecords() ) = dloc;
else {
DiskLoc i = cappedListOfAllDeletedRecords();
- for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted );
- i.drec()->nextDeleted = dloc;
+ for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted )
+ ;
+ i.drec()->nextDeleted.writing() = dloc;
}
} else {
d->nextDeleted = cappedFirstDeletedInCurExtent();
- cappedFirstDeletedInCurExtent() = dloc;
+ dur::writingDiskLoc( cappedFirstDeletedInCurExtent() ) = dloc;
// always compact() after this so order doesn't matter
}
} else {
int b = bucket(d->lengthWithHeaders);
DiskLoc& list = deletedList[b];
DiskLoc oldHead = list;
- list = dloc;
+ dur::writingDiskLoc(list) = dloc;
d->nextDeleted = oldHead;
}
}
- /*
- lenToAlloc is WITH header
- */
+ // lenToAlloc is WITH header
DiskLoc NamespaceDetails::alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc) {
lenToAlloc = (lenToAlloc + 3) & 0xfffffffc;
DiskLoc loc = _alloc(ns, lenToAlloc);
@@ -223,6 +225,7 @@ namespace mongo {
return loc;
DeletedRecord *r = loc.drec();
+ r = dur::writing(r);
/* note we want to grab from the front so our next pointers on disk tend
to go in a forward direction which is important for performance. */
@@ -247,9 +250,10 @@ namespace mongo {
DiskLoc newDelLoc = loc;
newDelLoc.inc(lenToAlloc);
DeletedRecord *newDel = DataFileMgr::makeDeletedRecord(newDelLoc, left);
- newDel->extentOfs = r->extentOfs;
- newDel->lengthWithHeaders = left;
- newDel->nextDeleted.Null();
+ DeletedRecord *newDelW = dur::writing(newDel);
+ newDelW->extentOfs = r->extentOfs;
+ newDelW->lengthWithHeaders = left;
+ newDelW->nextDeleted.Null();
addDeletedRec(newDel, newDelLoc);
@@ -323,8 +327,8 @@ namespace mongo {
/* unlink ourself from the deleted list */
{
- DeletedRecord *bmr = bestmatch.drec();
- *bestprev = bmr->nextDeleted;
+ DeletedRecord *bmr = dur::writing(bestmatch.drec());
+ *dur::writing(bestprev) = bmr->nextDeleted;
bmr->nextDeleted.setInvalid(); // defensive.
assert(bmr->extentOfs < bestmatch.getOfs());
}
@@ -394,6 +398,21 @@ namespace mongo {
return cappedAlloc(ns,len);
}
+ void NamespaceIndex::kill_ns(const char *ns) {
+ if ( !ht )
+ return;
+ Namespace n(ns);
+ ht->kill(n);
+
+ for( int i = 0; i<=1; i++ ) {
+ try {
+ Namespace extra(n.extraName(i).c_str());
+ ht->kill(extra);
+ }
+ catch(DBException&) { }
+ }
+ }
+
/* extra space for indexes when more than 10 */
NamespaceDetails::Extra* NamespaceIndex::newExtra(const char *ns, int i, NamespaceDetails *d) {
assert( i >= 0 && i <= 1 );
@@ -440,7 +459,7 @@ namespace mongo {
id = &idx(nIndexes,false);
}
- nIndexes++;
+ (*dur::writing(&nIndexes))++;
if ( resetTransient )
NamespaceDetailsTransient::get_w(thisns).addedIndex();
return *id;
diff --git a/db/namespace.h b/db/namespace.h
index 4f6cde9ac8e..7479a21da24 100644
--- a/db/namespace.h
+++ b/db/namespace.h
@@ -125,8 +125,10 @@ namespace mongo {
*/
DiskLoc deletedList[Buckets];
// ofs 168 (8 byte aligned)
- long long datasize;
- long long nrecords;
+ struct Stats {
+ long long datasize; //datasize and nrecords MUST Be adjacent code assumes!
+ long long nrecords;
+ } stats;
int lastExtentSize;
int nIndexes;
private:
@@ -503,20 +505,7 @@ namespace mongo {
return d;
}
- void kill_ns(const char *ns) {
- if ( !ht )
- return;
- Namespace n(ns);
- ht->kill(n);
-
- for( int i = 0; i<=1; i++ ) {
- try {
- Namespace extra(n.extraName(i).c_str());
- ht->kill(extra);
- }
- catch(DBException&) { }
- }
- }
+ void kill_ns(const char *ns);
bool find(const char *ns, DiskLoc& loc) {
NamespaceDetails *l = details(ns);
diff --git a/db/pdfile.cpp b/db/pdfile.cpp
index cedcc2aafc6..8e75ce41925 100644
--- a/db/pdfile.cpp
+++ b/db/pdfile.cpp
@@ -396,24 +396,25 @@ namespace mongo {
}
void addNewExtentToNamespace(const char *ns, Extent *e, DiskLoc eloc, DiskLoc emptyLoc, bool capped) {
- DiskLoc oldExtentLoc;
NamespaceIndex *ni = nsindex(ns);
NamespaceDetails *details = ni->details(ns);
if ( details ) {
assert( !details->lastExtent.isNull() );
assert( !details->firstExtent.isNull() );
- e->xprev = details->lastExtent;
- details->lastExtent.ext()->xnext = eloc;
+ dur::writingDiskLoc(e->xprev) = details->lastExtent;
+ dur::writingDiskLoc(details->lastExtent.ext()->xnext) = eloc;
assert( !eloc.isNull() );
- details->lastExtent = eloc;
+ dur::writingDiskLoc(details->lastExtent) = eloc;
}
else {
ni->add_ns(ns, eloc, capped);
details = ni->details(ns);
}
- details->lastExtentSize = e->length;
- DEBUGGING out() << "temp: newextent adddelrec " << ns << endl;
+ {
+ NamespaceDetails *dw = dur::writing(details);
+ dw->lastExtentSize = e->length;
+ }
details->addDeletedRec(emptyLoc.drec(), emptyLoc);
}
@@ -434,11 +435,13 @@ namespace mongo {
return cc().database()->addAFile( 0, true )->createExtent(ns, approxSize, newCapped, loops+1);
}
int offset = header->unused.getOfs();
- header->unused.set( fileNo, offset + ExtentSize );
- header->unusedLength -= ExtentSize;
+
+ DataFileHeader *h = dur::writing(header);
+ h->unused.set( fileNo, offset + ExtentSize );
+ h->unusedLength -= ExtentSize;
loc.set(fileNo, offset);
Extent *e = _getExtent(loc);
- DiskLoc emptyLoc = e->init(ns, ExtentSize, fileNo, offset);
+ DiskLoc emptyLoc = dur::writing(e)->init(ns, ExtentSize, fileNo, offset);
addNewExtentToNamespace(ns, e, loc, emptyLoc, newCapped);
@@ -553,9 +556,7 @@ namespace mongo {
emptyLoc.inc( (int) (_extentData-(char*)this) );
int l = _length - (_extentData - (char *) this);
- //DeletedRecord *empty1 = (DeletedRecord *) extentData;
- DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, l);
- //assert( empty == empty1 );
+ DeletedRecord *empty = dur::writing( DataFileMgr::makeDeletedRecord(emptyLoc, l) );
empty->lengthWithHeaders = l;
empty->extentOfs = myLoc.getOfs();
return emptyLoc;
@@ -750,12 +751,11 @@ namespace mongo {
else {
DiskLoc a = freeExtents->firstExtent;
assert( a.ext()->xprev.isNull() );
- a.ext()->xprev = d->lastExtent;
- d->lastExtent.ext()->xnext = a;
- freeExtents->firstExtent = d->firstExtent;
-
- d->firstExtent.setInvalid();
- d->lastExtent.setInvalid();
+ dur::writingDiskLoc( a.ext()->xprev ) = d->lastExtent;
+ dur::writingDiskLoc( d->lastExtent.ext()->xnext ) = a;
+ dur::writingDiskLoc( freeExtents->firstExtent ) = d->firstExtent;
+ dur::writingDiskLoc( d->firstExtent ).setInvalid();
+ dur::writingDiskLoc( d->lastExtent ).setInvalid();
}
}
@@ -843,14 +843,14 @@ namespace mongo {
/* remove ourself from the record next/prev chain */
{
if ( todelete->prevOfs != DiskLoc::NullOfs )
- todelete->getPrev(dl).rec()->nextOfs = todelete->nextOfs;
+ dur::writingInt( todelete->getPrev(dl).rec()->nextOfs ) = todelete->nextOfs;
if ( todelete->nextOfs != DiskLoc::NullOfs )
- todelete->getNext(dl).rec()->prevOfs = todelete->prevOfs;
+ dur::writingInt( todelete->getNext(dl).rec()->prevOfs ) = todelete->prevOfs;
}
/* remove ourself from extent pointers */
{
- Extent *e = todelete->myExtent(dl);
+ Extent *e = dur::writing( todelete->myExtent(dl) );
if ( e->firstRecord == dl ) {
if ( todelete->nextOfs == DiskLoc::NullOfs )
e->firstRecord.Null();
@@ -867,18 +867,26 @@ namespace mongo {
/* add to the free list */
{
- d->nrecords--;
- d->datasize -= todelete->netLength();
- /* temp: if in system.indexes, don't reuse, and zero out: we want to be
- careful until validated more, as IndexDetails has pointers
- to this disk location. so an incorrectly done remove would cause
- a lot of problems.
- */
+ {
+ NamespaceDetails::Stats *s = dur::writing(&d->stats);
+ s->datasize -= todelete->netLength();
+ s->nrecords--;
+ }
+
if ( strstr(ns, ".system.indexes") ) {
- memset(todelete, 0, todelete->lengthWithHeaders);
+ /* temp: if in system.indexes, don't reuse, and zero out: we want to be
+ careful until validated more, as IndexDetails has pointers
+ to this disk location. so an incorrectly done remove would cause
+ a lot of problems.
+ */
+ memset(dur::writingPtr(todelete, todelete->lengthWithHeaders), 0, todelete->lengthWithHeaders);
}
else {
- DEV memset(todelete->data, 0, todelete->netLength()); // attempt to notice invalid reuse.
+ DEV {
+ unsigned long long *p = (unsigned long long *) todelete->data;
+ *dur::writing(p) = 0;
+ //DEV memset(todelete->data, 0, todelete->netLength()); // attempt to notice invalid reuse.
+ }
d->addDeletedRec((DeletedRecord*)todelete, dl);
}
}
@@ -1082,7 +1090,7 @@ namespace mongo {
bool dropDups = idx.dropDups() || inDBRepair;
BSONObj order = idx.keyPattern();
- idx.head.Null();
+ dur::writingDiskLoc(idx.head).Null();
if ( logLevel > 1 ) printMemInfo( "before index start" );
@@ -1090,9 +1098,9 @@ namespace mongo {
unsigned long long n = 0;
shared_ptr<Cursor> c = theDataFileMgr.findAll(ns);
BSONObjExternalSorter sorter(order);
- sorter.hintNumObjects( d->nrecords );
+ sorter.hintNumObjects( d->stats.nrecords );
unsigned long long nkeys = 0;
- ProgressMeterHolder pm( op->setMessage( "index: (1/3) external sort" , d->nrecords , 10 ) );
+ ProgressMeterHolder pm( op->setMessage( "index: (1/3) external sort" , d->stats.nrecords , 10 ) );
while ( c->ok() ) {
BSONObj o = c->current();
DiskLoc loc = c->currLoc();
@@ -1180,7 +1188,7 @@ namespace mongo {
bool dupsAllowed = !idx.unique();
bool dropDups = idx.dropDups();
- ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->nrecords );
+ ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->stats.nrecords );
unsigned long long n = 0;
auto_ptr<ClientCursor> cc;
@@ -1333,7 +1341,7 @@ namespace mongo {
if ( d == 0 || (d->flags & NamespaceDetails::Flag_HaveIdIndex) )
return;
- d->flags |= NamespaceDetails::Flag_HaveIdIndex;
+ *dur::writing(&d->flags) |= NamespaceDetails::Flag_HaveIdIndex;
{
NamespaceDetails::IndexIterator i = d->ii();
@@ -1532,6 +1540,7 @@ namespace mongo {
Record *r = loc.rec();
assert( r->lengthWithHeaders >= lenWHdr );
+ r = (Record*) dur::writingPtr(r, lenWHdr);
if( addID ) {
/* a little effort was made here to avoid a double copy when we add an ID */
((int&)*r->data) = *((int*) obuf) + newId->size();
@@ -1542,22 +1551,25 @@ namespace mongo {
if( obuf )
memcpy(r->data, obuf, len);
}
- Extent *e = r->myExtent(loc);
+ Extent *e = dur::writing(r->myExtent(loc));
if ( e->lastRecord.isNull() ) {
e->firstRecord = e->lastRecord = loc;
r->prevOfs = r->nextOfs = DiskLoc::NullOfs;
}
else {
-
Record *oldlast = e->lastRecord.rec();
r->prevOfs = e->lastRecord.getOfs();
r->nextOfs = DiskLoc::NullOfs;
- oldlast->nextOfs = loc.getOfs();
+ dur::writing(oldlast)->nextOfs = loc.getOfs();
e->lastRecord = loc;
}
- d->nrecords++;
- d->datasize += r->netLength();
+ /* durability todo : this could be a bit annoying / slow to record constantly */
+ {
+ NamespaceDetails::Stats *s = dur::writing(&d->stats);
+ s->datasize += r->netLength();
+ s->nrecords++;
+ }
// we don't bother clearing those stats for the god tables - also god is true when adidng a btree bucket
if ( !god )
@@ -1578,7 +1590,7 @@ namespace mongo {
int idxNo = tableToIndex->nIndexes;
IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str(), !background); // clear transient info caches so they refresh; increments nIndexes
- idx.info = loc;
+ dur::writingDiskLoc(idx.info) = loc;
try {
buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo, background);
} catch( DBException& e ) {
@@ -1669,8 +1681,12 @@ namespace mongo {
e->lastRecord = loc;
}
- d->nrecords++;
- d->datasize += r->netLength();
+ /* todo: don't update for oplog? seems wasteful. */
+ {
+ NamespaceDetails::Stats *s = dur::writing(&d->stats);
+ s->datasize += r->netLength();
+ s->nrecords++;
+ }
return r;
}
diff --git a/db/pdfile.h b/db/pdfile.h
index edfc4422b39..1d4001658b8 100644
--- a/db/pdfile.h
+++ b/db/pdfile.h
@@ -69,9 +69,7 @@ namespace mongo {
*/
Extent* createExtent(const char *ns, int approxSize, bool capped = false, int loops = 0);
- DataFileHeader *getHeader() {
- return header;
- }
+ DataFileHeader *getHeader() { return header; }
/* return max size an extent may be */
static int maxSize();
@@ -292,10 +290,7 @@ namespace mongo {
return ( version == VERSION ) && ( versionMinor == VERSION_MINOR );
}
- bool uninitialized() const {
- if ( version == 0 ) return true;
- return false;
- }
+ bool uninitialized() const { return version == 0; }
/*Record* __getRecord(DiskLoc dl) {
int ofs = dl.getOfs();
@@ -307,12 +302,13 @@ namespace mongo {
if ( uninitialized() ) {
assert(filelength > 32768 );
assert( HeaderSize == 8192 );
- fileLength = filelength;
- version = VERSION;
- versionMinor = VERSION_MINOR;
- unused.set( fileno, HeaderSize );
+ DataFileHeader *h = dur::writing(this);
+ h->fileLength = filelength;
+ h->version = VERSION;
+ h->versionMinor = VERSION_MINOR;
+ h->unused.set( fileno, HeaderSize );
assert( (data-(char*)this) == HeaderSize );
- unusedLength = fileLength - HeaderSize - 16;
+ h->unusedLength = fileLength - HeaderSize - 16;
//memcpy(data+unusedLength, " \nthe end\n", 16);
}
}
diff --git a/db/query.cpp b/db/query.cpp
index 8b5d24ba1f9..8f63bf44d7b 100644
--- a/db/query.cpp
+++ b/db/query.cpp
@@ -525,7 +525,7 @@ namespace mongo {
// count of all objects
if ( query.isEmpty() ){
- return applySkipLimit( d->nrecords , cmd );
+ return applySkipLimit( d->stats.nrecords , cmd );
}
MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true );
CountOp original( ns , cmd );
diff --git a/db/rec.h b/db/rec.h
index 9785d4892b9..b6afa355667 100644
--- a/db/rec.h
+++ b/db/rec.h
@@ -51,7 +51,7 @@ public:
theDataFileMgr._deleteRecord(nsdetails_notinline(ns), ns, d.rec(), d);
}
- VIRT void modified(DiskLoc d) { }
+//goingaway VIRT void modified(DiskLoc d) { }
VIRT void drop(const char *ns) {
dropNS(ns);
@@ -127,11 +127,4 @@ inline BtreeBucket* DiskLoc::btree() const {
return (BtreeBucket*) btreeStore->get(*this, BucketSize);
}
-inline BtreeBucket* DiskLoc::btreemod() const {
- assert( _a != -1 );
- BtreeBucket *b = (BtreeBucket*) btreeStore->get(*this, BucketSize);
- btreeStore->modified(*this);
- return b;
-}
-
}
diff --git a/db/repl/manager.cpp b/db/repl/manager.cpp
index 328f6d279f9..c1a7c858d18 100644
--- a/db/repl/manager.cpp
+++ b/db/repl/manager.cpp
@@ -55,9 +55,12 @@ namespace mongo {
}
Manager::~Manager() {
- log() << "ERROR: ~Manager should never be called" << rsLog;
+ /* we don't destroy the replset object we sit in; however, the destructor could have thrown on init.
+ the log message below is just a reminder to come back one day and review this code more, and to
+ make it cleaner.
+ */
+ log() << "info: ~Manager called" << rsLog;
rs->mgr = 0;
- //assert(false);
}
void Manager::starting() {
diff --git a/db/repl/rs.h b/db/repl/rs.h
index 164d179d7a3..19f8e5e0ff3 100644
--- a/db/repl/rs.h
+++ b/db/repl/rs.h
@@ -75,7 +75,7 @@ namespace mongo {
virtual void starting();
public:
Manager(ReplSetImpl *rs);
- ~Manager();
+ virtual ~Manager();
void msgReceivedNewConfig(BSONObj);
void msgCheckNewState();
};
diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp
index 3851c66827d..a0579ac967e 100644
--- a/db/repl/rs_initialsync.cpp
+++ b/db/repl/rs_initialsync.cpp
@@ -74,7 +74,7 @@ namespace mongo {
NamespaceDetails *d = nsdetails(rsoplog);
// temp
- if( d && d->nrecords == 0 )
+ if( d && d->stats.nrecords == 0 )
return; // already empty, ok.
log(1) << "replSet empty oplog" << rsLog;
diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp
index aeb9b8b25e7..0fcaaece31f 100644
--- a/db/repl/rs_rollback.cpp
+++ b/db/repl/rs_rollback.cpp
@@ -524,7 +524,7 @@ namespace mongo {
}
}
// did we just empty the collection? if so let's check if it even exists on the source.
- if( nsd->nrecords == 0 ) {
+ if( nsd->stats.nrecords == 0 ) {
try {
string sys = cc().database()->name + ".system.namespaces";
bo o = them->findOne(sys, QUERY("name"<<d.ns));
diff --git a/dbtests/mmaptests.cpp b/dbtests/mmaptests.cpp
index dd60b2f9d53..f272b63dae4 100755
--- a/dbtests/mmaptests.cpp
+++ b/dbtests/mmaptests.cpp
@@ -1,5 +1,4 @@
-// socktests.cpp : sock.{h,cpp} unit tests.
-//
+// @file mmaptests.cpp
/**
* Copyright (C) 2008 10gen Inc.
diff --git a/dbtests/namespacetests.cpp b/dbtests/namespacetests.cpp
index ca051fe15f9..5588bf7c2cd 100644
--- a/dbtests/namespacetests.cpp
+++ b/dbtests/namespacetests.cpp
@@ -604,7 +604,7 @@ namespace NamespaceTests {
++count;
}
}
- ASSERT_EQUALS( count, nsd()->nrecords );
+ ASSERT_EQUALS( count, nsd()->stats.nrecords );
return count;
}
int nExtents() const {
@@ -620,7 +620,7 @@ namespace NamespaceTests {
return ns_;
}
NamespaceDetails *nsd() const {
- return nsdetails( ns() );
+ return dur::writing( nsdetails( ns() ) );
}
static BSONObj bigObj() {
string as( 187, 'a' );
@@ -737,9 +737,9 @@ namespace NamespaceTests {
}
DiskLoc d = l[6];
- long long n = nsd->nrecords;
+ long long n = nsd->stats.nrecords;
nsd->cappedTruncateAfter(ns(), d, false);
- ASSERT_EQUALS( nsd->nrecords , n-1 );
+ ASSERT_EQUALS( nsd->stats.nrecords , n-1 );
{
ForwardCappedCursor c(nsd);
@@ -770,7 +770,7 @@ namespace NamespaceTests {
void run() {
create();
nsd()->deletedList[ 2 ] = nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted;
- nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted = DiskLoc();
+ nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted.writing() = DiskLoc();
nsd()->cappedLastDelRecLastExtent().Null();
NamespaceDetails *d = nsd();
zero( &d->capExtent );
diff --git a/dbtests/test.vcxproj b/dbtests/test.vcxproj
index 8e0173c11d4..8b7e5b95fed 100644
--- a/dbtests/test.vcxproj
+++ b/dbtests/test.vcxproj
@@ -97,7 +97,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>_NOTDURABLE;_UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>_DURABLE;_UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>No</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -263,6 +263,7 @@
<ClCompile Include="..\client\model.cpp" />
<ClCompile Include="..\client\parallel.cpp" />
<ClCompile Include="..\db\cap.cpp" />
+ <ClCompile Include="..\db\dur.cpp" />
<ClCompile Include="..\db\geo\2d.cpp" />
<ClCompile Include="..\db\geo\haystack.cpp" />
<ClCompile Include="..\db\repl\consensus.cpp" />
diff --git a/dbtests/test.vcxproj.filters b/dbtests/test.vcxproj.filters
index 3302cbf79e6..16b8368ab7c 100755
--- a/dbtests/test.vcxproj.filters
+++ b/dbtests/test.vcxproj.filters
@@ -704,6 +704,9 @@
<ClCompile Include="..\scripting\bench.cpp">
<Filter>scripting</Filter>
</ClCompile>
+ <ClCompile Include="..\db\dur.cpp">
+ <Filter>db\cpp</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\SConstruct">
diff --git a/s/d_split.cpp b/s/d_split.cpp
index 208cf665899..5c569da391b 100644
--- a/s/d_split.cpp
+++ b/s/d_split.cpp
@@ -196,8 +196,8 @@ namespace mongo {
return false;
}
- const long long recCount = d->nrecords;
- const long long dataSize = d->datasize;
+ const long long recCount = d->stats.nrecords;
+ const long long dataSize = d->stats.datasize;
// If there's not enough data for more than one chunk, no point continuing.
if ( dataSize < maxChunkSize || recCount == 0 ) {
diff --git a/util/hashtab.h b/util/hashtab.h
index 16c5483eda6..6604864e65a 100644
--- a/util/hashtab.h
+++ b/util/hashtab.h
@@ -24,6 +24,7 @@
#include "../pch.h"
#include <map>
+#include "../db/dur.h"
namespace mongo {
@@ -127,35 +128,28 @@ namespace mongo {
bool found;
int i = _find(k, found);
if ( i >= 0 && found ) {
- Node& n = nodes(i);
- n.k.kill();
- n.setUnused();
+ Node* n = &nodes(i);
+ n = dur::writing(n);
+ n->k.kill();
+ n->setUnused();
}
}
-/*
- void drop(const Key& k) {
- bool found;
- int i = _find(k, found);
- if ( i >= 0 && found ) {
- nodes[i].setUnused();
- }
- }
-*/
+
/** returns false if too full */
bool put(const Key& k, const Type& value) {
bool found;
int i = _find(k, found);
if ( i < 0 )
return false;
- Node& n = nodes(i);
+ Node* n = dur::writing( &nodes(i) );
if ( !found ) {
- n.k = k;
- n.hash = k.hash();
+ n->k = k;
+ n->hash = k.hash();
}
else {
- assert( n.hash == k.hash() );
+ assert( n->hash == k.hash() );
}
- n.value = value;
+ n->value = value;
return true;
}
diff --git a/util/mmap.h b/util/mmap.h
index 826c8b07b82..eca6db811fb 100644
--- a/util/mmap.h
+++ b/util/mmap.h
@@ -136,10 +136,12 @@ namespace mongo {
};
MemoryMappedFile();
+
~MemoryMappedFile() {
- destroyed();
+ destroyed(); // cleans up from the master list of mmaps
close();
}
+
void close();
void* testGetCopyOnWriteView();
@@ -171,15 +173,15 @@ namespace mongo {
void flush(bool sync);
virtual Flushable * prepareFlush();
- /*void* viewOfs() {
- return view;
- }*/
-
long shortLength() const { return (long) len; }
unsigned long long length() const { return len; }
string filename() const { return _filename; }
+#if defined(_DURABLE) && defined(_DEBUG)
+ static void* getWriteViewFor(void *ptr);
+#endif
+
private:
static void updateLength( const char *filename, unsigned long long &length );
@@ -192,7 +194,9 @@ namespace mongo {
#ifdef _WIN32
boost::shared_ptr<mutex> _flushMutex;
#endif
-
+#if defined(_DURABLE)
+ void *writeView;
+#endif
protected:
// only posix mmap implementations will support this
virtual void _lock();
diff --git a/util/mmap_win.cpp b/util/mmap_win.cpp
index a21e7505eb8..5901804da67 100644
--- a/util/mmap_win.cpp
+++ b/util/mmap_win.cpp
@@ -22,8 +22,11 @@
namespace mongo {
+ static map<void *, MemoryMappedFile*> viewToWriteable;
+ static mutex viewToWriteableMutex("viewToWriteableMutex");
+
MemoryMappedFile::MemoryMappedFile()
- : _flushMutex(new mutex("flushMutex"))
+ : _flushMutex(new mutex("flushMutex")), _filename("??")
{
fd = 0;
maphandle = 0;
@@ -33,8 +36,17 @@ namespace mongo {
}
void MemoryMappedFile::close() {
- if ( view )
+ //log() << "dur mmap close " << filename() << endl;
+ if ( view ) {
+ {
+ mutex::scoped_lock lk(viewToWriteableMutex);
+ viewToWriteable.erase(view);
+ }
UnmapViewOfFile(view);
+#if defined(_DURABLE)
+ UnmapViewOfFile(writeView);
+#endif
+ }
view = 0;
if ( maphandle )
CloseHandle(maphandle);
@@ -61,6 +73,9 @@ namespace mongo {
}
void* MemoryMappedFile::map(const char *filenameIn, unsigned long long &length, int options) {
+#if defined(_DURABLE)
+ options |= READONLY;
+#endif
_filename = filenameIn;
/* big hack here: Babble uses db names with colons. doesn't seem to work on windows. temporary perhaps. */
char filename[256];
@@ -80,37 +95,38 @@ namespace mongo {
updateLength( filename, length );
- DWORD createOptions = FILE_ATTRIBUTE_NORMAL;
- if ( options & SEQUENTIAL )
- createOptions |= FILE_FLAG_SEQUENTIAL_SCAN;
- DWORD rw = GENERIC_READ | GENERIC_WRITE;
- //if ( options & READONLY )
- // rw = GENERIC_READ;
-
- fd = CreateFile(
- toNativeString(filename).c_str(),
- rw, // desired access
- FILE_SHARE_READ, // share mode
- NULL, // security
- OPEN_ALWAYS, // create disposition
- createOptions , // flags
- NULL); // hTempl
- if ( fd == INVALID_HANDLE_VALUE ) {
- log() << "Create/OpenFile failed " << filename << ' ' << GetLastError() << endl;
- return 0;
+ {
+ DWORD createOptions = FILE_ATTRIBUTE_NORMAL;
+ if ( options & SEQUENTIAL )
+ createOptions |= FILE_FLAG_SEQUENTIAL_SCAN;
+ DWORD rw = GENERIC_READ | GENERIC_WRITE;
+ fd = CreateFile(
+ toNativeString(filename).c_str(),
+ rw, // desired access
+ FILE_SHARE_READ, // share mode
+ NULL, // security
+ OPEN_ALWAYS, // create disposition
+ createOptions , // flags
+ NULL); // hTempl
+ if ( fd == INVALID_HANDLE_VALUE ) {
+ log() << "Create/OpenFile failed " << filename << ' ' << GetLastError() << endl;
+ return 0;
+ }
}
mapped += length;
- DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE;
- maphandle = CreateFileMapping(fd, NULL, flProtect,
- length >> 32 /*maxsizehigh*/,
- (unsigned) length /*maxsizelow*/,
- NULL/*lpName*/);
- if ( maphandle == NULL ) {
- DWORD e = GetLastError(); // log() call was killing lasterror before we get to that point in the stream
- log() << "CreateFileMapping failed " << filename << ' ' << errnoWithDescription(e) << endl;
- return 0;
+ {
+ DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE;
+ maphandle = CreateFileMapping(fd, NULL, flProtect,
+ length >> 32 /*maxsizehigh*/,
+ (unsigned) length /*maxsizelow*/,
+ NULL/*lpName*/);
+ if ( maphandle == NULL ) {
+ DWORD e = GetLastError(); // log() call was killing lasterror before we get to that point in the stream
+ log() << "CreateFileMapping failed " << filename << ' ' << errnoWithDescription(e) << endl;
+ return 0;
+ }
}
{
@@ -122,9 +138,58 @@ namespace mongo {
log() << "MapViewOfFile failed " << filename << " " << errnoWithDescription(e) << endl;
}
len = length;
+
+#if defined(_DURABLE)
+ {
+ if( !( options & READONLY ) ) {
+ log() << "dur: not readonly view which is wrong : " << filename << endl;
+ }
+ void *p = MapViewOfFile(maphandle, FILE_MAP_ALL_ACCESS, /*f ofs hi*/0, /*f ofs lo*/ 0, /*dwNumberOfBytesToMap 0 means to eof*/0);
+ assert( p );
+ writeView = p;
+ {
+ mutex::scoped_lock lk(viewToWriteableMutex);
+ viewToWriteable[view] = this;
+ }
+ log() << filenameIn << endl;
+ log() << " ro: " << view << " - " << (void*) (((char *)view)+length) << endl;
+ log() << " w : " << writeView << " - " << (void*) (((char *)writeView)+length) << endl;
+ }
+#endif
+
return view;
}
+#if defined(_DURABLE) && defined(_DEBUG)
+ void* MemoryMappedFile::getWriteViewFor(void *p) {
+ mutex::scoped_lock lk(viewToWriteableMutex);
+ std::map< void*, MemoryMappedFile* >::iterator i =
+ viewToWriteable.upper_bound(((char *)p)+1);
+ i--;
+ assert( i != viewToWriteable.end() );
+ MemoryMappedFile *mmf = i->second;
+ assert( mmf );
+
+ size_t ofs = ((char *)p) - ((char*)mmf->view);
+
+ if( ofs >= mmf->len ) {
+ log() << "getWriteViewFor error? " << p << endl;
+ for( std::map<void*,MemoryMappedFile*>::iterator i = viewToWriteable.begin(); i != viewToWriteable.end(); i++ ) {
+ char *wl = (char *) i->second->writeView;
+ char *wh = wl + i->second->length();
+ if( p >= wl && p < wh ) {
+ log() << "dur ERROR p " << p << " is already in the writable view of " << i->second->filename() << endl;
+ //wassert(false);
+ // could do this:
+ return p;
+ }
+ }
+ assert( ofs < mmf->len ); // did you call writing() with a pointer that isn't into a datafile?
+ }
+ return ((char *)mmf->writeView) + ofs;
+ }
+#endif
+
class WindowsFlushable : public MemoryMappedFile::Flushable {
public:
WindowsFlushable( void * view , HANDLE fd , string filename , boost::shared_ptr<mutex> flushMutex )