diff options
-rw-r--r-- | db/cloner.cpp | 24 | ||||
-rw-r--r-- | db/cloner.h | 6 | ||||
-rw-r--r-- | db/database.cpp | 35 | ||||
-rw-r--r-- | db/database.h | 7 | ||||
-rw-r--r-- | db/repl.cpp | 121 | ||||
-rw-r--r-- | db/repl.h | 26 | ||||
-rw-r--r-- | dbtests/repltests.cpp | 38 | ||||
-rw-r--r-- | jstests/dbcase.js | 10 | ||||
-rw-r--r-- | jstests/dropdb.js | 12 | ||||
-rw-r--r-- | jstests/repl/dbcase.js | 88 |
10 files changed, 344 insertions, 23 deletions
diff --git a/db/cloner.cpp b/db/cloner.cpp index 2fa58c33bec..fd86e10b787 100644 --- a/db/cloner.cpp +++ b/db/cloner.cpp @@ -62,7 +62,7 @@ namespace mongo { for example repairDatabase need not use it. */ void setConnection( DBClientWithCommands *c ) { conn.reset( c ); } - bool go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, bool mayBeInterrupted); + bool go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, bool mayBeInterrupted, int *errCode = 0); bool copyCollection( const string& from , const string& ns , const BSONObj& query , string& errmsg , bool mayYield, bool mayBeInterrupted, bool copyIndexes = true, bool logForRepl = true ); }; @@ -272,8 +272,10 @@ namespace mongo { extern bool inDBRepair; void ensureIdIndexForNewNs(const char *ns); - bool Cloner::go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, bool mayBeInterrupted) { - + bool Cloner::go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, bool mayBeInterrupted, int *errCode) { + if ( errCode ) { + *errCode = 0; + } massert( 10289 , "useReplAuth is not written to replication log", !useReplAuth || !logForRepl ); string todb = cc().database()->name; @@ -324,6 +326,18 @@ namespace mongo { errmsg = "query failed " + ns; return false; } + + if ( c->more() ) { + BSONObj first = c->next(); + if ( first.hasField("$err") ) { + if ( errCode ) { + *errCode = first.getIntField("code"); + } + errmsg = "query failed " + ns; + return false; + } + c->putBack( first ); + } while ( c->more() ) { BSONObj collection = c->next(); @@ -414,9 +428,9 @@ namespace mongo { } bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication, - bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, bool mayBeInterrupted) { + bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, bool mayBeInterrupted, int *errCode) { Cloner c; - return c.go(masterHost, errmsg, fromdb, logForReplication, slaveOk, useReplAuth, snapshot, mayYield, mayBeInterrupted); + return c.go(masterHost, errmsg, fromdb, logForReplication, slaveOk, useReplAuth, snapshot, mayYield, mayBeInterrupted, errCode); } /* Usage: diff --git a/db/cloner.h b/db/cloner.h index 3e6041218a5..94264f85e58 100644 --- a/db/cloner.h +++ b/db/cloner.h @@ -16,6 +16,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#pragma once + #include "jsobj.h" namespace mongo { @@ -25,10 +27,12 @@ namespace mongo { * @param useReplAuth - use the credentials we normally use as a replication slave for the cloning * @param snapshot - use $snapshot mode for copying collections. note this should not be used when it isn't required, as it will be slower. * for example repairDatabase need not use it. + * @param errCode - If provided, this will be set on error to the server's error code. Currently + * this will only be set if there is an error in the initial system.namespaces query. */ bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication, bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, - bool mayBeInterrupted); + bool mayBeInterrupted, int *errCode = 0); bool copyCollectionFromRemote(const string& host, const string& ns, const BSONObj& query, string& errmsg, bool logForRepl, bool mayYield, bool mayBeInterrupted); diff --git a/db/database.cpp b/db/database.cpp index 3372c6eb0d3..83df344ac04 100644 --- a/db/database.cpp +++ b/db/database.cpp @@ -76,28 +76,47 @@ namespace mongo { } void Database::checkDuplicateUncasedNames() const { + string duplicate = duplicateUncasedName( name, path ); + if ( !duplicate.empty() ) { + stringstream ss; + ss << "db already exists with different case other: [" << duplicate << "] me [" << name << "]"; + uasserted( DatabaseDifferCaseCode , ss.str() ); + } + } + + string Database::duplicateUncasedName( const string &name, const string &path, set< string > *duplicates ) { + if ( duplicates ) { + duplicates->clear(); + } + vector<string> others; getDatabaseNames( others , path ); - + set<string> allShortNames; dbHolder.getAllShortNames( allShortNames ); others.insert( others.end(), allShortNames.begin(), allShortNames.end() ); for ( unsigned i=0; i<others.size(); i++ ) { - + if ( strcasecmp( others[i].c_str() , name.c_str() ) ) continue; if ( strcmp( others[i].c_str() , name.c_str() ) == 0 ) continue; - - stringstream ss; - ss << "db already exists with different case other: [" << others[i] << "] me [" << name << "]"; - uasserted( DatabaseDifferCaseCode , ss.str() ); - } - } + if ( duplicates ) { + duplicates->insert( others[i] ); + } else { + return others[i]; + } + } + if ( duplicates ) { + return duplicates->empty() ? "" : *duplicates->begin(); + } + return ""; + } + boost::filesystem::path Database::fileName( int n ) const { stringstream ss; ss << name << '.' << n; diff --git a/db/database.h b/db/database.h index 1fdadabdcfc..4ef6d9b3863 100644 --- a/db/database.h +++ b/db/database.h @@ -112,6 +112,13 @@ namespace mongo { */ void checkDuplicateUncasedNames() const; + /** + * @return name of an existing database with same text name but different + * casing, if one exists. Otherwise the empty string is returned. If + * 'duplicates' is specified, it is filled with all duplicate names. + */ + static string duplicateUncasedName( const string &name, const string &path, set< string > *duplicates = 0 ); + public: // this should be private later vector<MongoDataFile*> files; diff --git a/db/repl.cpp b/db/repl.cpp index baf22637ac9..455df038672 100644 --- a/db/repl.cpp +++ b/db/repl.cpp @@ -477,22 +477,131 @@ namespace mongo { } /* grab initial copy of a database from the master */ - bool ReplSource::resync(string db) { + void ReplSource::resync(string db) { string dummyNs = resyncDrop( db.c_str(), "internal" ); Client::Context ctx( dummyNs ); { log() << "resync: cloning database " << db << " to get an initial copy" << endl; ReplInfo r("resync: cloning a database"); string errmsg; - bool ok = cloneFrom(hostName.c_str(), errmsg, cc().database()->name, false, /*slaveok*/ true, /*replauth*/ true, /*snapshot*/false, /*mayYield*/true, /*mayBeInterrupted*/false); + int errCode = 0; + bool ok = cloneFrom(hostName.c_str(), errmsg, cc().database()->name, false, /*slaveok*/ true, /*replauth*/ true, /*snapshot*/false, /*mayYield*/true, /*mayBeInterrupted*/false, &errCode); if ( !ok ) { - problem() << "resync of " << db << " from " << hostName << " failed " << errmsg << endl; - throw SyncException(); + if ( errCode == DatabaseDifferCaseCode ) { + resyncDrop( db.c_str(), "internal" ); + log() << "resync: database " << db << " not valid on the master due to a name conflict, dropping." << endl; + return; + } + else { + problem() << "resync of " << db << " from " << hostName << " failed " << errmsg << endl; + throw SyncException(); + } } } log() << "resync: done with initial clone for db: " << db << endl; + return; + } + + DatabaseIgnorer ___databaseIgnorer; + + void DatabaseIgnorer::doIgnoreUntilAfter( const string &db, const OpTime &futureOplogTime ) { + if ( futureOplogTime > _ignores[ db ] ) { + _ignores[ db ] = futureOplogTime; + } + } + + bool DatabaseIgnorer::ignoreAt( const string &db, const OpTime ¤tOplogTime ) { + if ( _ignores[ db ].isNull() ) { + return false; + } + if ( _ignores[ db ] >= currentOplogTime ) { + return true; + } else { + // The ignore state has expired, so clear it. + _ignores.erase( db ); + return false; + } + } + + bool ReplSource::handleDuplicateDbName( const BSONObj &op, const char *ns, const char *db ) { + if ( dbHolder.isLoaded( ns, dbpath ) ) { + // Database is already present. + return true; + } + if ( ___databaseIgnorer.ignoreAt( db, op.getField( "ts" ).date() ) ) { + // Database is ignored due to a previous indication that it is + // missing from master after optime "ts". + return false; + } + if ( Database::duplicateUncasedName( db, dbpath ).empty() ) { + // No duplicate database names are present. + return true; + } + + OpTime lastTime; + bool dbOk = false; + { + dbtemprelease release; + + // We always log an operation after executing it (never before), so + // a database list will always be valid as of an oplog entry generated + // before it was retrieved. + + BSONObj last = oplogReader.findOne( this->ns().c_str(), Query().sort( BSON( "$natural" << -1 ) ) ); + if ( !last.isEmpty() ) { + BSONElement ts = last.getField( "ts" ); + massert( 14032, "Invalid 'ts' in remote log", ts.type() == Date || ts.type() == Timestamp ); + lastTime = OpTime( ts.date() ); + } + + BSONObj info; + bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info ); + massert( 14029, "Unable to get database list", ok ); + BSONObjIterator i( info.getField( "databases" ).embeddedObject() ); + while( i.more() ) { + BSONElement e = i.next(); + + const char * name = e.embeddedObject().getField( "name" ).valuestr(); + if ( strcasecmp( name, db ) != 0 ) + continue; + + if ( strcmp( name, db ) == 0 ) { + // The db exists on master, still need to check that no conflicts exist there. + dbOk = true; + continue; + } + + // The master has a db name that conflicts with the requested name. + dbOk = false; + break; + } + } + + if ( !dbOk ) { + ___databaseIgnorer.doIgnoreUntilAfter( db, lastTime ); + incompleteCloneDbs.erase(db); + addDbNextPass.erase(db); + return false; + } + + // Check for duplicates again, since we released the lock above. + set< string > duplicates; + Database::duplicateUncasedName( db, dbpath, &duplicates ); + + // The database is present on the master and no conflicting databases + // are present on the master. Drop any local conflicts. + for( set< string >::const_iterator i = duplicates.begin(); i != duplicates.end(); ++i ) { + ___databaseIgnorer.doIgnoreUntilAfter( *i, lastTime ); + incompleteCloneDbs.erase(*i); + addDbNextPass.erase(*i); + Client::Context ctx(*i); + dropDatabase(*i); + } + + massert( 14030, "Duplicate database names present after attempting to delete duplicates", + Database::duplicateUncasedName( db, dbpath ).empty() ); return true; } @@ -589,6 +698,10 @@ namespace mongo { throw SyncException(); } + if ( !handleDuplicateDbName( op, ns, clientName ) ) { + return; + } + Client::Context ctx( ns ); ctx.getClient()->curop()->reset(); diff --git a/db/repl.h b/db/repl.h index 7ea669d6b39..8c3f326f54a 100644 --- a/db/repl.h +++ b/db/repl.h @@ -86,7 +86,7 @@ namespace mongo { class ReplSource { auto_ptr<ThreadPool> tp; - bool resync(string db); + void resync(string db); /** @param alreadyLocked caller already put us in write lock if true */ void sync_pullOpLog_applyOperation(BSONObj& op, bool alreadyLocked); @@ -114,6 +114,14 @@ namespace mongo { string ns() const { return string( "local.oplog.$" ) + sourceName(); } unsigned _sleepAdviceTime; + /** + * If 'db' is a new database and its name would conflict with that of + * an existing database, synchronize these database names with the + * master. + * @return true iff an op with the specified ns may be applied. + */ + bool handleDuplicateDbName( const BSONObj &op, const char *ns, const char *db ); + public: OplogReader oplogReader; @@ -162,5 +170,21 @@ namespace mongo { bool anyReplEnabled(); void appendReplicationInfo( BSONObjBuilder& result , bool authed , int level = 0 ); + /** + * Helper class used to set and query an ignore state for a named database. + * The ignore state will expire after a specified OpTime. + */ + class DatabaseIgnorer { + public: + /** Indicate that operations for 'db' should be ignored until after 'futureOplogTime' */ + void doIgnoreUntilAfter( const string &db, const OpTime &futureOplogTime ); + /** + * Query ignore state of 'db'; if 'currentOplogTime' is after the ignore + * limit, the ignore state will be cleared. + */ + bool ignoreAt( const string &db, const OpTime ¤tOplogTime ); + private: + map< string, OpTime > _ignores; + }; } // namespace mongo diff --git a/dbtests/repltests.cpp b/dbtests/repltests.cpp index 4be8f7f655c..ecaacf74874 100644 --- a/dbtests/repltests.cpp +++ b/dbtests/repltests.cpp @@ -1012,7 +1012,43 @@ namespace ReplTests { ASSERT( !one( BSON( "_id" << 2 ) ).isEmpty() ); } }; + + class DatabaseIgnorerBasic { + public: + void run() { + DatabaseIgnorer d; + ASSERT( !d.ignoreAt( "a", OpTime( 4, 0 ) ) ); + d.doIgnoreUntilAfter( "a", OpTime( 5, 0 ) ); + ASSERT( d.ignoreAt( "a", OpTime( 4, 0 ) ) ); + ASSERT( !d.ignoreAt( "b", OpTime( 4, 0 ) ) ); + ASSERT( d.ignoreAt( "a", OpTime( 4, 10 ) ) ); + ASSERT( d.ignoreAt( "a", OpTime( 5, 0 ) ) ); + ASSERT( !d.ignoreAt( "a", OpTime( 5, 1 ) ) ); + // Ignore state is expired. + ASSERT( !d.ignoreAt( "a", OpTime( 4, 0 ) ) ); + } + }; + class DatabaseIgnorerUpdate { + public: + void run() { + DatabaseIgnorer d; + d.doIgnoreUntilAfter( "a", OpTime( 5, 0 ) ); + d.doIgnoreUntilAfter( "a", OpTime( 6, 0 ) ); + ASSERT( d.ignoreAt( "a", OpTime( 5, 5 ) ) ); + ASSERT( d.ignoreAt( "a", OpTime( 6, 0 ) ) ); + ASSERT( !d.ignoreAt( "a", OpTime( 6, 1 ) ) ); + + d.doIgnoreUntilAfter( "a", OpTime( 5, 0 ) ); + d.doIgnoreUntilAfter( "a", OpTime( 6, 0 ) ); + d.doIgnoreUntilAfter( "a", OpTime( 6, 0 ) ); + d.doIgnoreUntilAfter( "a", OpTime( 5, 0 ) ); + ASSERT( d.ignoreAt( "a", OpTime( 5, 5 ) ) ); + ASSERT( d.ignoreAt( "a", OpTime( 6, 0 ) ) ); + ASSERT( !d.ignoreAt( "a", OpTime( 6, 1 ) ) ); + } + }; + class All : public Suite { public: All() : Suite( "repl" ) { @@ -1065,6 +1101,8 @@ namespace ReplTests { add< Idempotence::RenameOverwrite >(); add< Idempotence::NoRename >(); add< DeleteOpIsIdBased >(); + add< DatabaseIgnorerBasic >(); + add< DatabaseIgnorerUpdate >(); } } myall; diff --git a/jstests/dbcase.js b/jstests/dbcase.js index 347031ff9be..44a184bafa3 100644 --- a/jstests/dbcase.js +++ b/jstests/dbcase.js @@ -1,6 +1,5 @@ +// Check db name duplication constraint SERVER-2111 -/* -TODO SERVER-2111 a = db.getSisterDB( "dbcasetest_dbnamea" ) b = db.getSisterDB( "dbcasetest_dbnameA" ) @@ -15,11 +14,14 @@ b.foo.save( { x : 1 } ) z = db.getLastErrorObj(); assert.eq( 13297 , z.code || 0 , "B : " + tojson(z) ) +assert.neq( -1, db.getMongo().getDBNames().indexOf( a.getName() ) ); +assert.eq( -1, db.getMongo().getDBNames().indexOf( b.getName() ) ); printjson( db.getMongo().getDBs().databases ); a.dropDatabase(); b.dropDatabase(); +// 'a' wil be present as a read only db because we issued a command with it in the ns. +assert.neq( -1, db.getMongo().getDBNames().indexOf( a.getName() ) ); +assert.eq( -1, db.getMongo().getDBNames().indexOf( b.getName() ) ); printjson( db.getMongo().getDBs().databases ); -*/ - diff --git a/jstests/dropdb.js b/jstests/dropdb.js new file mode 100644 index 00000000000..64a5b8184a8 --- /dev/null +++ b/jstests/dropdb.js @@ -0,0 +1,12 @@ +m = db.getMongo(); +baseName = "jstests_dropdb"; +ddb = db.getSisterDB( baseName ); + +ddb.c.save( {} ); +assert.neq( -1, m.getDBNames().indexOf( baseName ) ); + +ddb.dropDatabase(); +assert.eq( -1, m.getDBNames().indexOf( baseName ) ); + +ddb.dropDatabase(); +assert.eq( -1, m.getDBNames().indexOf( baseName ) ); diff --git a/jstests/repl/dbcase.js b/jstests/repl/dbcase.js new file mode 100644 index 00000000000..f3bc9d38072 --- /dev/null +++ b/jstests/repl/dbcase.js @@ -0,0 +1,88 @@ +// Test db case checking with replication SERVER-2111 + +baseName = "jstests_repl_dbcase"; + +rt = new ReplTest( baseName ); + +m = rt.start( true ); +s = rt.start( false ); + +n1 = "dbname"; +n2 = "dbNAme"; + +/** + * The value of n should be n1 or n2. Check that n is soon present while its + * opposite is not present. + */ +function check( n ) { + assert.soon( function() { + names = s.getDBNames(); + n1Idx = names.indexOf( n1 ); + n2Idx = names.indexOf( n2 ); + // Check n1 and n2 are not both present. + assert( n1Idx == -1 || n2Idx == -1 ); + // Return true if we matched expected n. + return -1 != names.indexOf( n ); + } ); +} + +/** Allow some time for additional operations to be processed by the slave. */ +function checkTwice( n ) { + check( n ); + // zzz is expected to be cloned after n1 and n2 because of its position in the alphabet. + m.getDB( "zzz" ).c.save( {} ); + assert.soon( function() { return s.getDB( "zzz" ).c.count(); } ) + check( n ); + m.getDB( "zzz" ).dropDatabase(); +} + +/** + * The slave may create in memory db names on the master matching old dbs it is + * attempting to clone. This function forces operation 'cmd' by deleting those + * in memory dbs if necessary. This function should only be called in cases where + * 'cmd' would succeed if not for the in memory dbs on master created by the slave. + */ +function force( cmd ) { + print( "cmd: " + cmd ); + eval( cmd ); + while( m1.getLastError() ) { + sleep( 100 ); + m1.dropDatabase(); + m2.dropDatabase(); + eval( cmd ); + } +} + +m1 = m.getDB( n1 ); +m2 = m.getDB( n2 ); + +m1.c.save( {} ); +m2.c.save( {} ); // will fail due to conflict +check( n1 ); + +m1.dropDatabase(); +force( "m2.c.save( {} );" ); // will now succeed +check( n2 ); + +m2.dropDatabase(); +force( "m1.c.save( {} );" ); +check( n1 ); + +for( i = 0; i < 5; ++i ) { + m1.dropDatabase(); + force( "m2.c.save( {} );" ); + m2.dropDatabase(); + force( "m1.c.save( {} );" ); +} +checkTwice( n1 ); + +m1.dropDatabase(); +force( "m2.c.save( {} );" ); + +for( i = 0; i < 5; ++i ) { + m2.dropDatabase(); + force( "m1.c.save( {} );" ); + m1.dropDatabase(); + force( "m2.c.save( {} );" ); +} +checkTwice( n2 ); |