diff options
author | Greg Studer <greg@10gen.com> | 2014-01-13 18:50:02 -0500 |
---|---|---|
committer | Greg Studer <greg@10gen.com> | 2014-01-14 14:41:04 -0500 |
commit | 13226efcd6638b707e0f4a64564d655cea9bc9c7 (patch) | |
tree | 8f8a80c77f7048116f6d63b9d8d349de36189c73 /src | |
parent | 20a414a4809aa5d20f3a09f129f08c918cddf032 (diff) | |
download | mongo-13226efcd6638b707e0f4a64564d655cea9bc9c7.tar.gz |
SERVER-12274 refactor validation out of waiting for write concern
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/base/error_codes.err | 1 | ||||
-rw-r--r-- | src/mongo/db/commands/get_last_error.cpp | 115 | ||||
-rw-r--r-- | src/mongo/db/commands/write_commands/batch_executor.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/repl/write_concern.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/write_concern.cpp | 201 | ||||
-rw-r--r-- | src/mongo/db/write_concern.h | 13 | ||||
-rw-r--r-- | src/mongo/db/write_concern_options.cpp | 34 | ||||
-rw-r--r-- | src/mongo/shell/batch_api.js | 3 |
8 files changed, 222 insertions, 180 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 9a122408e98..c3a27fb0de4 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -78,6 +78,7 @@ error_code("WriteConcernLegacyOK", 75) error_code("NoReplicationEnabled", 76) error_code("OperationIncomplete", 77) error_code("CommandResultSchemaViolation", 78) +error_code("UnknownReplWriteConcern", 79) # Non-sequential error codes (for compatibility only) error_code("DuplicateKey", 11000) diff --git a/src/mongo/db/commands/get_last_error.cpp b/src/mongo/db/commands/get_last_error.cpp index a3202fa0237..1d7ea39abf1 100644 --- a/src/mongo/db/commands/get_last_error.cpp +++ b/src/mongo/db/commands/get_last_error.cpp @@ -93,49 +93,94 @@ namespace mongo { << " { w:'majority' } - await replication to majority of set\n" << " { wtimeout:m} - timeout for w in m milliseconds"; } - bool run(const string& dbname, BSONObj& _cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - LastError *le = lastError.disableForCommand(); - bool errorOccured = false; - if ( le->nPrev != 1 ) { - errorOccured = LastError::noError.appendSelf( result , false ); - le->appendSelfStatus( result ); - } - else { - errorOccured = le->appendSelf( result , false ); - } + bool run( const string& dbname, + BSONObj& cmdObj, + int, + string& errmsg, + BSONObjBuilder& result, + bool fromRepl ) { + + // + // Correct behavior here is very finicky. + // + // 1. The first step is to append the error that occurred on the previous operation. + // This adds an "err" field to the command, which is *not* the command failing. + // + // 2. Next we parse and validate write concern options. If these options are invalid + // the command fails no matter what, even if we actually had an error earlier. The + // reason for checking here is to match legacy behavior on these kind of failures - + // we'll still get an "err" field for the write error. + // + // 3. If we had an error on the previous operation, we then return immediately. + // + // 4. Finally, we actually enforce the write concern. All errors *except* timeout are + // reported with ok : 0.0, to match legacy behavior. + // + // There is a special case when "wOpTime" is explicitly provided by the user - in this + // we *only* enforce the write concern if it is valid. + // + + LastError *le = lastError.disableForCommand(); + // Always append lastOp and connectionId Client& c = cc(); c.appendLastOp( result ); - result.appendNumber( "connectionId" , c.getConnectionId() ); // for sharding; also useful in general for debugging - - BSONObj cmdObj = _cmdObj; - { - BSONObj::iterator i(_cmdObj); - i.next(); - if( !i.more() ) { - /* empty, use default */ - BSONObj *def = getLastErrorDefault; - if( def ) - cmdObj = *def; + // for sharding; also useful in general for debugging + result.appendNumber( "connectionId" , c.getConnectionId() ); + + bool errorOccured = false; + BSONObj writeConcernDoc = cmdObj; + + // Errors aren't reported when wOpTime is used + if ( cmdObj["wOpTime"].eoo() ) { + if ( le->nPrev != 1 ) { + errorOccured = LastError::noError.appendSelf( result, false ); + le->appendSelfStatus( result ); + } + else { + errorOccured = le->appendSelf( result, false ); } } + // Use the default options if we have no gle options aside from wOpTime + bool useDefaultGLEOptions = cmdObj.nFields() == 1 + || ( cmdObj.nFields() == 2 && !cmdObj["wOpTime"].eoo() ); + + if ( useDefaultGLEOptions && getLastErrorDefault ) { + writeConcernDoc = *getLastErrorDefault; + } + + // + // Validate write concern no matter what, this matches 2.4 behavior + // + WriteConcernOptions writeConcern; - Status status = writeConcern.parse( cmdObj ); + Status status = writeConcern.parse( writeConcernDoc ); + + if ( status.isOK() ) { + // Ensure options are valid for this host + status = validateWriteConcern( writeConcern ); + } + if ( !status.isOK() ) { - result.append( "badGLE", cmdObj ); - errmsg = status.toString(); - return false; + result.append( "badGLE", writeConcernDoc ); + return appendCommandStatus( result, status ); + } + + // Don't wait for replication if there was an error reported - this matches 2.4 behavior + if ( errorOccured ) { + dassert( cmdObj["wOpTime"].eoo() ); + return true; } - // Get the wOpTime from the GLE if it exists OpTime wOpTime; if ( cmdObj["wOpTime"].type() == Timestamp ) { + // Get the wOpTime from the command if it exists wOpTime = OpTime( cmdObj["wOpTime"].date() ); } - if ( wOpTime.isNull() ) { + else { // Use the client opTime if no wOpTime is specified wOpTime = cc().getLastOp(); } @@ -145,18 +190,14 @@ namespace mongo { WriteConcernResult res; status = waitForWriteConcern( writeConcern, wOpTime, &res ); - if ( !errorOccured ) { - // Error information fields from write concern can clash with the error from the - // actual write, so don't append if an error occurred on the previous operation. - // Note: In v2.4, the server only waits for the journal commmit or fsync and does - // not wait for replication when an error occurred on the previous operation. - res.appendTo( &result ); - } - - if ( status.code() == ErrorCodes::WriteConcernLegacyOK ) { - result.append( "wnote", status.toString() ); + // For backward compatibility with 2.4, wtimeout returns ok : 1.0 + if ( res.wTimedOut ) { + dassert( !status.isOK() ); + result.append( "errmsg", "timed out waiting for slaves" ); + result.append( "code", status.code() ); return true; } + return appendCommandStatus( result, status ); } diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp index b2f8150433c..45187098a75 100644 --- a/src/mongo/db/commands/write_commands/batch_executor.cpp +++ b/src/mongo/db/commands/write_commands/batch_executor.cpp @@ -69,28 +69,13 @@ namespace mongo { static WCErrorDetail* toWriteConcernError( const Status& wcStatus, const WriteConcernResult& wcResult ) { - // Error reported is either the errmsg or err from wc - string errMsg; - if ( !wcStatus.isOK() ) - errMsg = wcStatus.toString(); - else if ( wcResult.err.size() ) - errMsg = wcResult.err; - - if ( errMsg.empty() ) - return NULL; - WCErrorDetail* wcError = new WCErrorDetail; - if ( wcStatus.isOK() ) - wcError->setErrCode( ErrorCodes::WriteConcernFailed ); - else - wcError->setErrCode( wcStatus.code() ); - + wcError->setErrCode( wcStatus.code() ); + wcError->setErrMessage( wcStatus.reason() ); if ( wcResult.wTimedOut ) wcError->setErrInfo( BSON( "wtimeout" << true ) ); - wcError->setErrMessage( errMsg ); - return wcError; } @@ -118,6 +103,10 @@ namespace mongo { status = writeConcern.parse( _defaultWriteConcern ); } + if ( status.isOK() ) { + status = validateWriteConcern( writeConcern ); + } + if ( !status.isOK() ) { response->setErrCode( status.code() ); response->setErrMessage( status.reason() ); @@ -161,7 +150,9 @@ namespace mongo { WriteConcernResult res; status = waitForWriteConcern( writeConcern, _client->getLastOp(), &res ); - wcError.reset( toWriteConcernError( status, res ) ); + if ( !status.isOK() ) { + wcError.reset( toWriteConcernError( status, res ) ); + } } // diff --git a/src/mongo/db/repl/write_concern.cpp b/src/mongo/db/repl/write_concern.cpp index ed123cc354b..0b8378a3016 100644 --- a/src/mongo/db/repl/write_concern.cpp +++ b/src/mongo/db/repl/write_concern.cpp @@ -184,14 +184,16 @@ namespace mongo { } map<string,ReplSetConfig::TagRule*>::const_iterator it = theReplSet->config().rules.find(wStr); - uassert(14830, str::stream() << "unrecognized getLastError mode: " << wStr, + uassert(ErrorCodes::UnknownReplWriteConcern, + str::stream() << "unrecognized getLastError mode: " << wStr, it != theReplSet->config().rules.end()); return op <= (*it).second->last; } bool replicatedToNum(OpTime& op, int w) { - massert( 16805, "replicatedToNum called but not master anymore", _isMaster() ); + massert( ErrorCodes::NotMaster, + "replicatedToNum called but not master anymore", _isMaster() ); if ( w <= 1 ) return true; @@ -202,7 +204,7 @@ namespace mongo { } bool waitForReplication(OpTime& op, int w, int maxSecondsToWait) { - static const int noLongerMasterAssertCode = 16806; + static const int noLongerMasterAssertCode = ErrorCodes::NotMaster; massert(noLongerMasterAssertCode, "waitForReplication called but not master anymore", _isMaster() ); diff --git a/src/mongo/db/write_concern.cpp b/src/mongo/db/write_concern.cpp index 8aa2003e080..c52cfb545e3 100644 --- a/src/mongo/db/write_concern.cpp +++ b/src/mongo/db/write_concern.cpp @@ -43,40 +43,37 @@ namespace mongo { static Counter64 gleWtimeouts; static ServerStatusMetricField<Counter64> gleWtimeoutsDisplay( "getLastError.wtimeouts", &gleWtimeouts ); - Status WriteConcernOptions::parse( const BSONObj& obj ) { - bool j = obj["j"].trueValue(); - bool fsync = obj["fsync"].trueValue(); + Status validateWriteConcern( const WriteConcernOptions& writeConcern ) { - if ( j & fsync ) - return Status( ErrorCodes::BadValue, "fsync and j options cannot be used together" ); + const bool isJournalEnabled = getDur().isDurable(); - if ( j ) { - syncMode = JOURNAL; - } - if ( fsync ) { - if ( getDur().isDurable() ) - syncMode = JOURNAL; - else - syncMode = FSYNC; + if ( writeConcern.syncMode == WriteConcernOptions::JOURNAL && !isJournalEnabled ) { + return Status( ErrorCodes::BadValue, + "cannot use 'j' option when a host does not have journaling enabled" ); } + const bool isConfigServer = serverGlobalParams.configsvr; + const bool isMasterSlaveNode = anyReplEnabled() && !theReplSet; + const bool isReplSetNode = anyReplEnabled() && theReplSet; - BSONElement e = obj["w"]; - if ( e.isNumber() ) { - wNumNodes = e.numberInt(); - } - else if ( e.type() == String ) { - wMode = e.valuestrsafe(); - } - else if ( e.eoo() || - e.type() == jstNULL || - e.type() == Undefined ) { - } - else { - return Status( ErrorCodes::BadValue, "w has to be a number or a string" ); + if ( isConfigServer || ( !isMasterSlaveNode && !isReplSetNode ) ) { + + // Note that config servers can be replicated (have an oplog), but we still don't allow + // w > 1 + + if ( writeConcern.wNumNodes > 1 ) { + return Status( ErrorCodes::BadValue, + string( "cannot use 'w' > 1 " ) + + ( isConfigServer ? "on a config server host" : + "when a host is not replicated" ) ); + } } - wTimeout = obj["wtimeout"].numberInt(); + if ( !isReplSetNode && !writeConcern.wMode.empty() && writeConcern.wMode != "majority" ) { + return Status( ErrorCodes::BadValue, + string( "cannot use non-majority 'w' mode " ) + writeConcern.wMode + + " when a host is not a member of a replica set" ); + } return Status::OK(); } @@ -113,128 +110,98 @@ namespace mongo { const OpTime& replOpTime, WriteConcernResult* result ) { - // first handle blocking on disk + // We assume all options have been validated earlier, if not, programming error + dassert( validateWriteConcern( writeConcern ).isOK() ); + + // Next handle blocking on disk Timer syncTimer; + switch( writeConcern.syncMode ) { case WriteConcernOptions::NONE: break; - case WriteConcernOptions::JOURNAL: - if ( getDur().awaitCommit() ) { - // success - break; - } - result->err = "nojournal"; - return Status( ErrorCodes::BadValue, "journaling not enabled" ); case WriteConcernOptions::FSYNC: - result->fsyncFiles = MemoryMappedFile::flushAll( true ); + if ( !getDur().isDurable() ) { + result->fsyncFiles = MemoryMappedFile::flushAll( true ); + } + else { + // We only need to commit the journal if we're durable + getDur().awaitCommit(); + } + break; + case WriteConcernOptions::JOURNAL: + getDur().awaitCommit(); break; } + result->syncMillis = syncTimer.millis(); - // now wait for replication + // Now wait for replication - if ( writeConcern.wNumNodes <= 1 && - writeConcern.wMode.empty() ) { - // no desired replication check + if ( replOpTime.isNull() ) { + // no write happened for this client yet return Status::OK(); } - if ( serverGlobalParams.configsvr ) { - // config servers have special rules - if ( writeConcern.wNumNodes > 1 ) { - result->err = "norepl"; - return Status( ErrorCodes::WriteConcernLegacyOK, - "cannot use w > 1 with config servers" ); - } - if ( writeConcern.wMode == "majority" ) { - // majority is ok for single nodes, as 1/1 > .5 - return Status::OK(); - } - result->err = "norepl"; - return Status( ErrorCodes::BadValue, - str::stream() << "unknown w mode for config servers " - << "(" << writeConcern.wMode << ")" ); + if ( writeConcern.wNumNodes <= 1 && writeConcern.wMode.empty() ) { + // no desired replication check + return Status::OK(); } - if ( !anyReplEnabled() ) { - // no replication enabled and not a config server - // so we handle some simple things, or fail - - if ( writeConcern.wNumNodes > 1 ) { - result->err = "norepl"; - return Status( ErrorCodes::WriteConcernLegacyOK, - str::stream() << "no replication and asked for w > 1 " - << "(" << writeConcern.wNumNodes << ")" ); - } - if ( !writeConcern.wMode.empty() && - writeConcern.wMode != "majority" ) { - result->err = "norepl"; - return Status( ErrorCodes::WriteConcernLegacyOK, - "no replication and asked for w with a mode" ); - } - - // asked for w <= 1 or w=majority - // so we can just say ok + if ( !anyReplEnabled() || serverGlobalParams.configsvr ) { + // no replication check needed (validated above) return Status::OK(); } - bool doTiming = writeConcern.wNumNodes > 1 || !writeConcern.wMode.empty(); - scoped_ptr<TimerHolder> gleTimerHolder( new TimerHolder( doTiming ? &gleWtimeStats : NULL ) ); - - if ( replOpTime.isNull() ) { - // no write happened for this client yet + const bool isMasterSlaveNode = anyReplEnabled() && !theReplSet; + if ( writeConcern.wMode == "majority" && isMasterSlaveNode ) { + // with master/slave, majority is equivalent to w=1 return Status::OK(); } - if ( !writeConcern.wMode.empty() && !theReplSet ) { - if ( writeConcern.wMode == "majority" ) { - // with master/slave, majority is equivilant to w=1 - return Status::OK(); - } - return Status( ErrorCodes::BadValue, - str::stream() << "asked for a w mode with master/slave " - << "[" << writeConcern.wMode << "]" ); - } + // We're sure that replication is enabled and that we have more than one node or a wMode + TimerHolder gleTimerHolder( &gleWtimeStats ); - // now that we've done the prep, now we actually wait - while ( 1 ) { + // Now we wait for replication + // Note that replica set stepdowns and gle mode changes are thrown as errors + // TODO: Make this cleaner + Status replStatus = Status::OK(); + try { + while ( 1 ) { - if ( !_isMaster() ) { - // this should be in the while loop in case we step down - return Status( ErrorCodes::NotMaster, "no longer primary" ); - } + if ( writeConcern.wNumNodes > 0 ) { + if ( opReplicatedEnough( replOpTime, writeConcern.wNumNodes ) ) { + break; + } + } + else if ( opReplicatedEnough( replOpTime, writeConcern.wMode ) ) { + break; + } - if ( writeConcern.wNumNodes > 0 ) { - if ( opReplicatedEnough( replOpTime, writeConcern.wNumNodes ) ) { + if ( writeConcern.wTimeout > 0 && + gleTimerHolder.millis() >= writeConcern.wTimeout ) { + gleWtimeouts.increment(); + result->err = "timeout"; + result->wTimedOut = true; + replStatus = Status( ErrorCodes::WriteConcernFailed, + "waiting for replication timed out" ); break; } - } - else if ( opReplicatedEnough( replOpTime, writeConcern.wMode ) ) { - break; - } - if ( writeConcern.wTimeout > 0 && - gleTimerHolder->millis() >= writeConcern.wTimeout ) { - gleWtimeouts.increment(); - result->wTime = gleTimerHolder->millis(); - result->writtenTo = getHostsWrittenTo( replOpTime ); - result->err = "timeout"; - result->wTimedOut = true; - return Status( ErrorCodes::WriteConcernLegacyOK, - "waiting for replication timed out" ); + sleepmillis(1); + killCurrentOp.checkForInterrupt(); } - - sleepmillis(1); - killCurrentOp.checkForInterrupt(); } - - if ( doTiming ) { - result->writtenTo = getHostsWrittenTo( replOpTime ); - result->wTime = gleTimerHolder->recordMillis(); + catch( const AssertionException& ex ) { + // Our replication state changed while enforcing write concern + replStatus = ex.toStatus(); } - return Status::OK(); + // Add stats + result->writtenTo = getHostsWrittenTo( replOpTime ); + result->wTime = gleTimerHolder.recordMillis(); + + return replStatus; } } // namespace mongo diff --git a/src/mongo/db/write_concern.h b/src/mongo/db/write_concern.h index 6253bfd5107..69c438d3ba3 100644 --- a/src/mongo/db/write_concern.h +++ b/src/mongo/db/write_concern.h @@ -32,6 +32,11 @@ namespace mongo { + /** + * Verifies that a WriteConcern is valid for this particular host. + */ + Status validateWriteConcern( const WriteConcernOptions& writeConcern ); + struct WriteConcernResult { WriteConcernResult() { reset(); @@ -59,15 +64,15 @@ namespace mongo { /** * Blocks until the database is sure the specified user write concern has been fulfilled, or - * returns an error status if the write concern fails. + * returns an error status if the write concern fails. Does no validation of the input write + * concern, it is an error to pass this function an invalid write concern for the host. * * Takes a user write concern as well as the replication opTime the write concern applies to - * if this opTime.isNull() no replication-related write concern options will be enforced. * * Returns result of the write concern if successful. - * Returns !OK if anything goes wrong. - * Returns WriteConcernLegacyOK if the write concern could not be applied but legacy GLE should - * not report an error. + * Returns NotMaster if the host steps down while waiting for replication + * Returns UnknownReplWriteConcern if the wMode specified was not enforceable */ Status waitForWriteConcern( const WriteConcernOptions& writeConcern, const OpTime& replOpTime, diff --git a/src/mongo/db/write_concern_options.cpp b/src/mongo/db/write_concern_options.cpp index 8597e55d28f..2658797e5e3 100644 --- a/src/mongo/db/write_concern_options.cpp +++ b/src/mongo/db/write_concern_options.cpp @@ -36,4 +36,38 @@ namespace mongo { const BSONObj WriteConcernOptions::AllConfigs = BSONObj(); const BSONObj WriteConcernOptions::Unacknowledged(BSON("w" << W_NONE)); + Status WriteConcernOptions::parse( const BSONObj& obj ) { + + bool j = obj["j"].trueValue(); + bool fsync = obj["fsync"].trueValue(); + + if ( j & fsync ) + return Status( ErrorCodes::BadValue, "fsync and j options cannot be used together" ); + + if ( j ) { + syncMode = JOURNAL; + } + if ( fsync ) { + syncMode = FSYNC; + } + + BSONElement e = obj["w"]; + if ( e.isNumber() ) { + wNumNodes = e.numberInt(); + } + else if ( e.type() == String ) { + wMode = e.valuestrsafe(); + } + else if ( e.eoo() || + e.type() == jstNULL || + e.type() == Undefined ) { + } + else { + return Status( ErrorCodes::BadValue, "w has to be a number or a string" ); + } + + wTimeout = obj["wtimeout"].numberInt(); + + return Status::OK(); + } } diff --git a/src/mongo/shell/batch_api.js b/src/mongo/shell/batch_api.js index 1d0a8e8f6c4..92f769fc0f9 100644 --- a/src/mongo/shell/batch_api.js +++ b/src/mongo/shell/batch_api.js @@ -238,6 +238,7 @@ var _batch_api_module = (function() { // Define properties defineReadOnlyProperty(this, "code", err.code); + defineReadOnlyProperty(this, "errInfo", err.errInfo); defineReadOnlyProperty(this, "errmsg", err.errmsg); this.tojson = function() { @@ -245,7 +246,7 @@ var _batch_api_module = (function() { } this.toString = function() { - return "WriteConcernError(" + err.errmsg + ")"; + return "WriteConcernError(" + tojson(err) + ")"; } this.shellPrint = function() { |