diff options
author | Dwight <dwight@10gen.com> | 2010-07-21 15:39:59 -0400 |
---|---|---|
committer | Dwight <dwight@10gen.com> | 2010-07-21 15:39:59 -0400 |
commit | 3b2ede3771e29adf3359086980220bf79a8b3f7a (patch) | |
tree | 1de0f2f4b5792a053c0e09e6d5af4b50f0d38d0c | |
parent | ac0125de71602f408dd9c8ea0ae05b26dc73b770 (diff) | |
download | mongo-3b2ede3771e29adf3359086980220bf79a8b3f7a.tar.gz |
rs rollback work
-rw-r--r-- | bson/bsonelement.h | 2 | ||||
-rw-r--r-- | db/repl/rs_initialsync.cpp | 5 | ||||
-rw-r--r-- | db/repl/rs_rollback.cpp | 70 | ||||
-rw-r--r-- | db/repl/rs_sync.cpp | 30 | ||||
-rw-r--r-- | util/optime.h | 3 |
5 files changed, 83 insertions, 27 deletions
diff --git a/bson/bsonelement.h b/bson/bsonelement.h index f9740f6b616..4a0449a5a63 100644 --- a/bson/bsonelement.h +++ b/bson/bsonelement.h @@ -288,7 +288,6 @@ public: return woCompare( r , true ) == 0; } - /** Well ordered comparison. @return <0: l<r. 0:l==r. >0:l>r order by type, field name, and field value. @@ -545,5 +544,4 @@ private: totalSize = 1; } - } diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp index dce91a0e17e..0e05972a4c1 100644 --- a/db/repl/rs_initialsync.cpp +++ b/db/repl/rs_initialsync.cpp @@ -51,11 +51,6 @@ namespace mongo { } } - static bool stillHave(OplogReader& r, OpTime t, long long h) { - cout << "not yet implemented" << endl; - return false; - } - bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication, bool slaveOk, bool useReplAuth, bool snapshot); diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp index ea1ccf7353f..84fb90e262d 100644 --- a/db/repl/rs_rollback.cpp +++ b/db/repl/rs_rollback.cpp @@ -61,8 +61,12 @@ namespace mongo { using namespace bson; - static void syncRollbackFindCommonPoint(DBClientConnection *us, DBClientConnection *them) { - throw "test"; + struct HowToFixUp { + list<bo> toRefetch; + OpTime commonPoint; + }; + + static void syncRollbackFindCommonPoint(DBClientConnection *us, DBClientConnection *them, HowToFixUp& h) { const Query q = Query().sort( BSON( "$natural" << -1 ) ); const bo fields = BSON( "ts" << 1 << "h" << 1 ); @@ -73,19 +77,55 @@ namespace mongo { if( !t->more() ) throw "remote oplog empty or unreadable"; BSONObj ourObj = u->nextSafe(); + OpTime ourTime = ourObj["ts"]._opTime(); BSONObj theirObj = t->nextSafe(); + OpTime theirTime = theirObj["ts"]._opTime(); - { - OpTime ourTime = ourObj["ts"]._opTime(); - OpTime theirTime = theirObj["ts"]._opTime(); + if( 1 ) { long long diff = (long long) ourTime.getSecs() - ((long long) theirTime.getSecs()); /* diff could be positive, negative, or zero */ - log() << "replSet syncRollback diff in end of log times : " << diff << " seconds" << rsLog; -// if( + log() << "replSet TEMP info syncRollback diff in end of log times : " << diff << " seconds" << rsLog; + /*if( diff > 3600 ) { + log() << "replSet syncRollback too long a time period for a rollback. sleeping 1 minute" << rsLog; + sleepsecs(60); + throw "error not willing to roll back more than one hour of data"; + }*/ } - if( 0 ) while( 1 ) { - + unsigned long long totSize = 0; + unsigned long long scanned = 0; + while( 1 ) { + scanned++; + /* todo add code to assure no excessive scanning for too long */ + if( ourTime == theirTime ) { + if( ourObj["h"].Long() == theirObj["h"].Long() ) { + // found the point back in time where we match. + // todo : check a few more just to be careful about hash collisions. + log() << "replSet rollback found matching events at " << ourTime.toStringPretty() << rsLog; + log() << "replSet scanned : " << scanned << rsLog; + log() << "replSet TODO finish " << rsLog; + h.commonPoint = ourTime; + return; + } + theirObj = t->nextSafe(); + theirTime = theirObj["ts"]._opTime(); + ourObj = u->nextSafe(); + ourTime = ourObj["ts"]._opTime(); + } + else if( theirTime > ourTime ) { + /* todo: we could hit beginning of log here. exception thrown is ok but not descriptive, so fix up */ + theirObj = t->nextSafe(); + theirTime = theirObj["ts"]._opTime(); + } + else { + // theirTime < ourTime + totSize += ourObj.objsize(); + if( totSize > 512 * 1024 * 1024 ) + throw "rollback too large"; + h.toRefetch.push_back( ourObj.getOwned() ); + ourObj = u->nextSafe(); + ourTime = ourObj["ts"]._opTime(); + } } } @@ -93,6 +133,7 @@ namespace mongo { assert( !lockedByMe() ); assert( !dbMutex.atLeastReadLocked() ); + HowToFixUp how; sethbmsg("syncRollback 1"); { r.resetCursor(); @@ -102,17 +143,24 @@ namespace mongo { sethbmsg("syncRollback connect to self failure" + errmsg); return; } + sethbmsg("syncRollback 2 FindCommonPoint"); try { - syncRollbackFindCommonPoint(&us, r.conn()); + syncRollbackFindCommonPoint(&us, r.conn(), how); } catch( const char *p ) { sethbmsg(string("syncRollback 2 error ") + p); + sleepsecs(10); return; } + catch( DBException& e ) { + sethbmsg(string("syncRollback 2 exception ") + e.toString() + "; sleeping 1 min"); + sleepsecs(60); + throw; + } } - + sethbmsg("replSet syncRollback 3 FINISH"); } } diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp index 3c538484658..dc8f10a7135 100644 --- a/db/repl/rs_sync.cpp +++ b/db/repl/rs_sync.cpp @@ -59,6 +59,23 @@ namespace mongo { return; } + /* first make sure we are not hopelessly out of sync by being very stale. */ + { + BSONObj remoteOldestOp = r.findOne(rsoplog, Query()); + OpTime ts = remoteOldestOp["ts"]._opTime(); + DEV log() << "remoteOldestOp: " << ts.toStringPretty() << endl; + else log(3) << "remoteOldestOp: " << ts.toStringPretty() << endl; + if( lastOpTimeWritten < ts ) { + log() << "replSet error too stale to catch up, at least from primary " << hn << rsLog; + log() << "replSet our last optime : " << lastOpTimeWritten.toStringPretty() << rsLog; + log() << "replSet oldest at " << hn << " : " << ts.toStringPretty() << rsLog; + log() << "replSet See http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member" << rsLog; + sethbmsg("error too stale to catch up"); + sleepsecs(120); + return; + } + } + r.tailingQueryGTE(rsoplog, lastOpTimeWritten); assert( r.haveCursor() ); assert( r.awaitCapable() ); @@ -68,15 +85,10 @@ namespace mongo { OpTime ts = o["ts"]._opTime(); long long h = o["h"].numberLong(); if( ts != lastOpTimeWritten || h != lastH ) { - if( lastOpTimeWritten < ts ) { - log() << "replSet error too stale to catch up, at least from primary " << hn << rsLog; - log() << "replSet our last optime : " << lastOpTimeWritten.toStringPretty() << rsLog; - log() << "replSet oldest at " << hn << " : " << ts.toStringPretty() << rsLog; - log() << "replSet See http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member" << rsLog; - sethbmsg("error too stale to catch up"); - sleepsecs(120); - return; - } + log() << "TEMP " << lastOpTimeWritten.toStringPretty() << endl; + log() << "TEMP " << ts.toStringPretty() << endl; + /* + }*/ syncRollback(r); return; diff --git a/util/optime.h b/util/optime.h index 4321c13c5fa..1483c9ac9c1 100644 --- a/util/optime.h +++ b/util/optime.h @@ -136,6 +136,9 @@ namespace mongo { bool operator<=(const OpTime& r) const { return *this < r || *this == r; } + bool operator>(const OpTime& r) const { + return !(*this <= r); + } }; #pragma pack() |