diff options
author | Eric Milkie <milkie@10gen.com> | 2012-05-18 10:33:40 -0400 |
---|---|---|
committer | Eric Milkie <milkie@10gen.com> | 2012-05-18 16:48:34 -0400 |
commit | 6b0869166832d3c8f540e451e4b6bf21c1d876f8 (patch) | |
tree | e283fc4960940aae904983c8aced4de7aba0a32d | |
parent | c3d0639c9a6939a279b8ced3d4c210aef1979d2a (diff) | |
download | mongo-6b0869166832d3c8f540e451e4b6bf21c1d876f8.tar.gz |
SERVER-5040 retry initial sync if errors occur when creating indexes
If you clone a database and a document, due to an update, moves forward in memory, cloner might clone both the old and new document.
When this happens, creating a unique index might fail. This change restarts the clone when this happens, and will abort after 3 failed cloning attempts.
-rw-r--r-- | db/cloner.cpp | 6 | ||||
-rw-r--r-- | db/repl/rs_initialsync.cpp | 12 | ||||
-rw-r--r-- | jstests/replsets/replset7.js | 46 |
3 files changed, 59 insertions, 5 deletions
diff --git a/db/cloner.cpp b/db/cloner.cpp index 26c2f74b6df..6d5b095d1df 100644 --- a/db/cloner.cpp +++ b/db/cloner.cpp @@ -169,7 +169,8 @@ namespace mongo { getDur().commitIfNeeded(); } catch( UserException& e ) { - log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; + error() << "error: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; + throw; } RARELY if ( time( 0 ) - saveLast > 60 ) { @@ -238,7 +239,8 @@ namespace mongo { getDur().commitIfNeeded(); } catch( UserException& e ) { - log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; + error() << "error: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; + throw; } } } diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp index 1677bfc38c6..56819657682 100644 --- a/db/repl/rs_initialsync.cpp +++ b/db/repl/rs_initialsync.cpp @@ -43,18 +43,24 @@ namespace mongo { } void ReplSetImpl::syncDoInitialSync() { + const static int maxFailedAttempts = 3; createOplog(); - - while( 1 ) { + int failedAttempts = 0; + while ( failedAttempts < maxFailedAttempts ) { try { _syncDoInitialSync(); break; } catch(DBException& e) { - sethbmsg("initial sync exception " + e.toString(), 0); + failedAttempts++; + str::stream msg; + msg << "initial sync exception: "; + msg << e.toString() << " " << (maxFailedAttempts - failedAttempts) << " attempts remaining" ; + sethbmsg(msg, 0); sleepsecs(30); } } + fassert( 16233, failedAttempts < maxFailedAttempts); } /* todo : progress metering to sethbmsg. */ diff --git a/jstests/replsets/replset7.js b/jstests/replsets/replset7.js new file mode 100644 index 00000000000..f29c1fbb953 --- /dev/null +++ b/jstests/replsets/replset7.js @@ -0,0 +1,46 @@ + +// test for SERVER-5040 - if documents move forward during an initial sync. + +var rt = new ReplSetTest( { name : "replset7tests" , nodes: 1 } ); + +var nodes = rt.startSet(); +rt.initiate(); +var master = rt.getMaster(); + +var md = master.getDB( 'd' ); +var mdc = md[ 'c' ]; + +// prep the data +var doccount = 100000; +for( i = 0; i < doccount; ++i ) { + mdc.insert( { _id:i, x:i } ); +} +md.getLastError(); + +mdc.ensureIndex( { x : 1 }, { unique: true } ); +md.getLastError(); + +// add a secondary +var slave = rt.add(); +rt.reInitiate(); +print ("initiation complete!"); +var sc = slave.getDB( 'd' )[ 'c' ]; +slave.setSlaveOk(); + +// Wait for slave to start cloning. +//assert.soon( function() { c = sc.find( { _id:1, x:1 } ); print( c ); return c > 0; } ); + + +// Move all documents to the end by growing it +for (i = 0; i < doccount; ++i) { + mdc.remove( { _id:i, x:i } ); + mdc.insert( { _id:doccount+i, x:i, bigstring: "ayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayay" } ); + md.getLastError(); +} + +// Wait for replication to catch up. +rt.awaitSecondaryNodes(); + +// Do we have an index? +assert.eq (1, slave.getDB( 'd' )['system.indexes'] + .find({"v" : 1,"key" : {"x" : 1},"unique" : true,"ns" : "d.c","name" : "x_1"}).count()); |