summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDwight <dwight@10gen.com>2010-07-21 15:39:59 -0400
committerDwight <dwight@10gen.com>2010-07-21 15:39:59 -0400
commit3b2ede3771e29adf3359086980220bf79a8b3f7a (patch)
tree1de0f2f4b5792a053c0e09e6d5af4b50f0d38d0c
parentac0125de71602f408dd9c8ea0ae05b26dc73b770 (diff)
downloadmongo-3b2ede3771e29adf3359086980220bf79a8b3f7a.tar.gz
rs rollback work
-rw-r--r--bson/bsonelement.h2
-rw-r--r--db/repl/rs_initialsync.cpp5
-rw-r--r--db/repl/rs_rollback.cpp70
-rw-r--r--db/repl/rs_sync.cpp30
-rw-r--r--util/optime.h3
5 files changed, 83 insertions, 27 deletions
diff --git a/bson/bsonelement.h b/bson/bsonelement.h
index f9740f6b616..4a0449a5a63 100644
--- a/bson/bsonelement.h
+++ b/bson/bsonelement.h
@@ -288,7 +288,6 @@ public:
return woCompare( r , true ) == 0;
}
-
/** Well ordered comparison.
@return <0: l<r. 0:l==r. >0:l>r
order by type, field name, and field value.
@@ -545,5 +544,4 @@ private:
totalSize = 1;
}
-
}
diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp
index dce91a0e17e..0e05972a4c1 100644
--- a/db/repl/rs_initialsync.cpp
+++ b/db/repl/rs_initialsync.cpp
@@ -51,11 +51,6 @@ namespace mongo {
}
}
- static bool stillHave(OplogReader& r, OpTime t, long long h) {
- cout << "not yet implemented" << endl;
- return false;
- }
-
bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication,
bool slaveOk, bool useReplAuth, bool snapshot);
diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp
index ea1ccf7353f..84fb90e262d 100644
--- a/db/repl/rs_rollback.cpp
+++ b/db/repl/rs_rollback.cpp
@@ -61,8 +61,12 @@ namespace mongo {
using namespace bson;
- static void syncRollbackFindCommonPoint(DBClientConnection *us, DBClientConnection *them) {
- throw "test";
+ struct HowToFixUp {
+ list<bo> toRefetch;
+ OpTime commonPoint;
+ };
+
+ static void syncRollbackFindCommonPoint(DBClientConnection *us, DBClientConnection *them, HowToFixUp& h) {
const Query q = Query().sort( BSON( "$natural" << -1 ) );
const bo fields = BSON( "ts" << 1 << "h" << 1 );
@@ -73,19 +77,55 @@ namespace mongo {
if( !t->more() ) throw "remote oplog empty or unreadable";
BSONObj ourObj = u->nextSafe();
+ OpTime ourTime = ourObj["ts"]._opTime();
BSONObj theirObj = t->nextSafe();
+ OpTime theirTime = theirObj["ts"]._opTime();
- {
- OpTime ourTime = ourObj["ts"]._opTime();
- OpTime theirTime = theirObj["ts"]._opTime();
+ if( 1 ) {
long long diff = (long long) ourTime.getSecs() - ((long long) theirTime.getSecs());
/* diff could be positive, negative, or zero */
- log() << "replSet syncRollback diff in end of log times : " << diff << " seconds" << rsLog;
-// if(
+ log() << "replSet TEMP info syncRollback diff in end of log times : " << diff << " seconds" << rsLog;
+ /*if( diff > 3600 ) {
+ log() << "replSet syncRollback too long a time period for a rollback. sleeping 1 minute" << rsLog;
+ sleepsecs(60);
+ throw "error not willing to roll back more than one hour of data";
+ }*/
}
- if( 0 ) while( 1 ) {
-
+ unsigned long long totSize = 0;
+ unsigned long long scanned = 0;
+ while( 1 ) {
+ scanned++;
+ /* todo add code to assure no excessive scanning for too long */
+ if( ourTime == theirTime ) {
+ if( ourObj["h"].Long() == theirObj["h"].Long() ) {
+ // found the point back in time where we match.
+ // todo : check a few more just to be careful about hash collisions.
+ log() << "replSet rollback found matching events at " << ourTime.toStringPretty() << rsLog;
+ log() << "replSet scanned : " << scanned << rsLog;
+ log() << "replSet TODO finish " << rsLog;
+ h.commonPoint = ourTime;
+ return;
+ }
+ theirObj = t->nextSafe();
+ theirTime = theirObj["ts"]._opTime();
+ ourObj = u->nextSafe();
+ ourTime = ourObj["ts"]._opTime();
+ }
+ else if( theirTime > ourTime ) {
+ /* todo: we could hit beginning of log here. exception thrown is ok but not descriptive, so fix up */
+ theirObj = t->nextSafe();
+ theirTime = theirObj["ts"]._opTime();
+ }
+ else {
+ // theirTime < ourTime
+ totSize += ourObj.objsize();
+ if( totSize > 512 * 1024 * 1024 )
+ throw "rollback too large";
+ h.toRefetch.push_back( ourObj.getOwned() );
+ ourObj = u->nextSafe();
+ ourTime = ourObj["ts"]._opTime();
+ }
}
}
@@ -93,6 +133,7 @@ namespace mongo {
assert( !lockedByMe() );
assert( !dbMutex.atLeastReadLocked() );
+ HowToFixUp how;
sethbmsg("syncRollback 1");
{
r.resetCursor();
@@ -102,17 +143,24 @@ namespace mongo {
sethbmsg("syncRollback connect to self failure" + errmsg);
return;
}
+
sethbmsg("syncRollback 2 FindCommonPoint");
try {
- syncRollbackFindCommonPoint(&us, r.conn());
+ syncRollbackFindCommonPoint(&us, r.conn(), how);
}
catch( const char *p ) {
sethbmsg(string("syncRollback 2 error ") + p);
+ sleepsecs(10);
return;
}
+ catch( DBException& e ) {
+ sethbmsg(string("syncRollback 2 exception ") + e.toString() + "; sleeping 1 min");
+ sleepsecs(60);
+ throw;
+ }
}
-
+ sethbmsg("replSet syncRollback 3 FINISH");
}
}
diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp
index 3c538484658..dc8f10a7135 100644
--- a/db/repl/rs_sync.cpp
+++ b/db/repl/rs_sync.cpp
@@ -59,6 +59,23 @@ namespace mongo {
return;
}
+ /* first make sure we are not hopelessly out of sync by being very stale. */
+ {
+ BSONObj remoteOldestOp = r.findOne(rsoplog, Query());
+ OpTime ts = remoteOldestOp["ts"]._opTime();
+ DEV log() << "remoteOldestOp: " << ts.toStringPretty() << endl;
+ else log(3) << "remoteOldestOp: " << ts.toStringPretty() << endl;
+ if( lastOpTimeWritten < ts ) {
+ log() << "replSet error too stale to catch up, at least from primary " << hn << rsLog;
+ log() << "replSet our last optime : " << lastOpTimeWritten.toStringPretty() << rsLog;
+ log() << "replSet oldest at " << hn << " : " << ts.toStringPretty() << rsLog;
+ log() << "replSet See http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member" << rsLog;
+ sethbmsg("error too stale to catch up");
+ sleepsecs(120);
+ return;
+ }
+ }
+
r.tailingQueryGTE(rsoplog, lastOpTimeWritten);
assert( r.haveCursor() );
assert( r.awaitCapable() );
@@ -68,15 +85,10 @@ namespace mongo {
OpTime ts = o["ts"]._opTime();
long long h = o["h"].numberLong();
if( ts != lastOpTimeWritten || h != lastH ) {
- if( lastOpTimeWritten < ts ) {
- log() << "replSet error too stale to catch up, at least from primary " << hn << rsLog;
- log() << "replSet our last optime : " << lastOpTimeWritten.toStringPretty() << rsLog;
- log() << "replSet oldest at " << hn << " : " << ts.toStringPretty() << rsLog;
- log() << "replSet See http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member" << rsLog;
- sethbmsg("error too stale to catch up");
- sleepsecs(120);
- return;
- }
+ log() << "TEMP " << lastOpTimeWritten.toStringPretty() << endl;
+ log() << "TEMP " << ts.toStringPretty() << endl;
+ /*
+ }*/
syncRollback(r);
return;
diff --git a/util/optime.h b/util/optime.h
index 4321c13c5fa..1483c9ac9c1 100644
--- a/util/optime.h
+++ b/util/optime.h
@@ -136,6 +136,9 @@ namespace mongo {
bool operator<=(const OpTime& r) const {
return *this < r || *this == r;
}
+ bool operator>(const OpTime& r) const {
+ return !(*this <= r);
+ }
};
#pragma pack()