diff options
Diffstat (limited to 'src/mongo/s/d_split.cpp')
-rw-r--r-- | src/mongo/s/d_split.cpp | 1453 |
1 files changed, 744 insertions, 709 deletions
diff --git a/src/mongo/s/d_split.cpp b/src/mongo/s/d_split.cpp index 1cb2504ab8c..ba1022f0513 100644 --- a/src/mongo/s/d_split.cpp +++ b/src/mongo/s/d_split.cpp @@ -50,7 +50,7 @@ #include "mongo/db/instance.h" #include "mongo/db/jsobj.h" #include "mongo/db/query/internal_plans.h" -#include "mongo/s/chunk.h" // for static genID only +#include "mongo/s/chunk.h" // for static genID only #include "mongo/s/chunk_version.h" #include "mongo/s/config.h" #include "mongo/s/d_state.h" @@ -62,861 +62,896 @@ namespace mongo { - using std::auto_ptr; - using std::endl; - using std::ostringstream; - using std::set; - using std::string; - using std::stringstream; - using std::vector; - - class CmdMedianKey : public Command { - public: - CmdMedianKey() : Command( "medianKey" ) {} - virtual bool slaveOk() const { return true; } - virtual bool isWriteCommandForConfigServer() const { return false; } - virtual void help( stringstream &help ) const { - help << "Deprecated internal command. Use splitVector command instead. \n"; - } - // No auth required as this command no longer does anything. - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) {} - bool run(OperationContext* txn, const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { - errmsg = "medianKey command no longer supported. Calling this indicates mismatch between mongo versions."; +using std::auto_ptr; +using std::endl; +using std::ostringstream; +using std::set; +using std::string; +using std::stringstream; +using std::vector; + +class CmdMedianKey : public Command { +public: + CmdMedianKey() : Command("medianKey") {} + virtual bool slaveOk() const { + return true; + } + virtual bool isWriteCommandForConfigServer() const { + return false; + } + virtual void help(stringstream& help) const { + help << "Deprecated internal command. Use splitVector command instead. \n"; + } + // No auth required as this command no longer does anything. + virtual void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) {} + bool run(OperationContext* txn, + const string& dbname, + BSONObj& jsobj, + int, + string& errmsg, + BSONObjBuilder& result, + bool fromRepl) { + errmsg = + "medianKey command no longer supported. Calling this indicates mismatch between mongo " + "versions."; + return false; + } +} cmdMedianKey; + +class CheckShardingIndex : public Command { +public: + CheckShardingIndex() : Command("checkShardingIndex", false) {} + virtual bool slaveOk() const { + return false; + } + virtual bool isWriteCommandForConfigServer() const { + return false; + } + virtual void help(stringstream& help) const { + help << "Internal command.\n"; + } + virtual void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) { + ActionSet actions; + actions.addAction(ActionType::find); + out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); + } + + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const { + return parseNsFullyQualified(dbname, cmdObj); + } + + bool run(OperationContext* txn, + const string& dbname, + BSONObj& jsobj, + int, + string& errmsg, + BSONObjBuilder& result, + bool fromRepl) { + std::string ns = parseNs(dbname, jsobj); + BSONObj keyPattern = jsobj.getObjectField("keyPattern"); + + if (keyPattern.isEmpty()) { + errmsg = "no key pattern found in checkShardingindex"; return false; } - } cmdMedianKey; - class CheckShardingIndex : public Command { - public: - CheckShardingIndex() : Command( "checkShardingIndex" , false ) {} - virtual bool slaveOk() const { return false; } - virtual bool isWriteCommandForConfigServer() const { return false; } - virtual void help( stringstream &help ) const { - help << "Internal command.\n"; + if (keyPattern.nFields() == 1 && str::equals("_id", keyPattern.firstElementFieldName())) { + result.appendBool("idskip", true); + return true; + } + + BSONObj min = jsobj.getObjectField("min"); + BSONObj max = jsobj.getObjectField("max"); + if (min.isEmpty() != max.isEmpty()) { + errmsg = "either provide both min and max or leave both empty"; + return false; } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { - ActionSet actions; - actions.addAction(ActionType::find); - out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); + + AutoGetCollectionForRead ctx(txn, ns); + Collection* collection = ctx.getCollection(); + if (!collection) { + errmsg = "ns not found"; + return false; } - std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const { - return parseNsFullyQualified(dbname, cmdObj); + IndexDescriptor* idx = collection->getIndexCatalog()->findIndexByPrefix( + txn, keyPattern, true); /* require single key */ + if (idx == NULL) { + errmsg = "couldn't find valid index for shard key"; + return false; + } + // extend min to get (min, MinKey, MinKey, ....) + KeyPattern kp(idx->keyPattern()); + min = Helpers::toKeyFormat(kp.extendRangeBound(min, false)); + if (max.isEmpty()) { + // if max not specified, make it (MaxKey, Maxkey, MaxKey...) + max = Helpers::toKeyFormat(kp.extendRangeBound(max, true)); + } else { + // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive + max = Helpers::toKeyFormat(kp.extendRangeBound(max, false)); } - bool run(OperationContext* txn, const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + auto_ptr<PlanExecutor> exec(InternalPlanner::indexScan( + txn, collection, idx, min, max, false, InternalPlanner::FORWARD)); + exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); + + // Find the 'missingField' value used to represent a missing document field in a key of + // this index. + // NOTE A local copy of 'missingField' is made because indices may be + // invalidated during a db lock yield. + BSONObj missingFieldObj = IndexLegacy::getMissingField(txn, collection, idx->infoObj()); + BSONElement missingField = missingFieldObj.firstElement(); + + // for now, the only check is that all shard keys are filled + // a 'missingField' valued index key is ok if the field is present in the document, + // TODO if $exist for nulls were picking the index, it could be used instead efficiently + int keyPatternLength = keyPattern.nFields(); + + RecordId loc; + BSONObj currKey; + while (PlanExecutor::ADVANCED == exec->getNext(&currKey, &loc)) { + // check that current key contains non missing elements for all fields in keyPattern + BSONObjIterator i(currKey); + for (int k = 0; k < keyPatternLength; k++) { + if (!i.more()) { + errmsg = str::stream() << "index key " << currKey << " too short for pattern " + << keyPattern; + return false; + } + BSONElement currKeyElt = i.next(); - std::string ns = parseNs(dbname, jsobj); - BSONObj keyPattern = jsobj.getObjectField( "keyPattern" ); + if (!currKeyElt.eoo() && !currKeyElt.valuesEqual(missingField)) + continue; - if ( keyPattern.isEmpty() ) { - errmsg = "no key pattern found in checkShardingindex"; - return false; - } + // This is a fetch, but it's OK. The underlying code won't throw a page fault + // exception. + BSONObj obj = collection->docFor(txn, loc).value(); + BSONObjIterator j(keyPattern); + BSONElement real; + for (int x = 0; x <= k; x++) + real = j.next(); - if ( keyPattern.nFields() == 1 && str::equals( "_id" , keyPattern.firstElementFieldName() ) ) { - result.appendBool( "idskip" , true ); - return true; - } + real = obj.getFieldDotted(real.fieldName()); + + if (real.type()) + continue; + + ostringstream os; + os << "found missing value in key " << currKey << " for doc: " + << (obj.hasField("_id") ? obj.toString() : obj["_id"].toString()); + log() << "checkShardingIndex for '" << ns << "' failed: " << os.str() << endl; - BSONObj min = jsobj.getObjectField( "min" ); - BSONObj max = jsobj.getObjectField( "max" ); - if ( min.isEmpty() != max.isEmpty() ) { - errmsg = "either provide both min and max or leave both empty"; + errmsg = os.str(); return false; } + } + + return true; + } +} cmdCheckShardingIndex; + +BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) { + return key.replaceFieldNames(keyPattern).clientReadable(); +} +class SplitVector : public Command { +public: + SplitVector() : Command("splitVector", false) {} + virtual bool slaveOk() const { + return false; + } + virtual bool isWriteCommandForConfigServer() const { + return false; + } + virtual void help(stringstream& help) const { + help << "Internal command.\n" + "examples:\n" + " { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, " + "maxChunkSize:200 }\n" + " maxChunkSize unit in MBs\n" + " May optionally specify 'maxSplitPoints' and 'maxChunkObjects' to avoid " + "traversing the whole chunk\n" + " \n" + " { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, " + "force: true }\n" + " 'force' will produce one split point even if data is small; defaults to false\n" + "NOTE: This command may take a while to run"; + } + virtual Status checkAuthForCommand(ClientBasic* client, + const std::string& dbname, + const BSONObj& cmdObj) { + if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource( + ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))), + ActionType::splitVector)) { + return Status(ErrorCodes::Unauthorized, "Unauthorized"); + } + return Status::OK(); + } + virtual std::string parseNs(const string& dbname, const BSONObj& cmdObj) const { + return parseNsFullyQualified(dbname, cmdObj); + } + bool run(OperationContext* txn, + const string& dbname, + BSONObj& jsobj, + int, + string& errmsg, + BSONObjBuilder& result, + bool fromRepl) { + // + // 1.a We'll parse the parameters in two steps. First, make sure the we can use the split index to get + // a good approximation of the size of the chunk -- without needing to access the actual data. + // + + const std::string ns = parseNs(dbname, jsobj); + BSONObj keyPattern = jsobj.getObjectField("keyPattern"); + + if (keyPattern.isEmpty()) { + errmsg = "no key pattern found in splitVector"; + return false; + } + + // If min and max are not provided use the "minKey" and "maxKey" for the sharding key pattern. + BSONObj min = jsobj.getObjectField("min"); + BSONObj max = jsobj.getObjectField("max"); + if (min.isEmpty() != max.isEmpty()) { + errmsg = "either provide both min and max or leave both empty"; + return false; + } + + long long maxSplitPoints = 0; + BSONElement maxSplitPointsElem = jsobj["maxSplitPoints"]; + if (maxSplitPointsElem.isNumber()) { + maxSplitPoints = maxSplitPointsElem.numberLong(); + } + + long long maxChunkObjects = Chunk::MaxObjectPerChunk; + BSONElement MaxChunkObjectsElem = jsobj["maxChunkObjects"]; + if (MaxChunkObjectsElem.isNumber()) { + maxChunkObjects = MaxChunkObjectsElem.numberLong(); + } + + vector<BSONObj> splitKeys; + + { + // Get the size estimate for this namespace AutoGetCollectionForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); - if ( !collection ) { + if (!collection) { errmsg = "ns not found"; return false; } - IndexDescriptor *idx = - collection->getIndexCatalog()->findIndexByPrefix( txn, - keyPattern, - true ); /* require single key */ - if ( idx == NULL ) { - errmsg = "couldn't find valid index for shard key"; + // Allow multiKey based on the invariant that shard keys must be single-valued. + // Therefore, any multi-key index prefixed by shard key cannot be multikey over + // the shard key fields. + IndexDescriptor* idx = + collection->getIndexCatalog()->findIndexByPrefix(txn, keyPattern, false); + if (idx == NULL) { + errmsg = (string) "couldn't find index over splitting key " + + keyPattern.clientReadable().toString(); return false; } // extend min to get (min, MinKey, MinKey, ....) - KeyPattern kp( idx->keyPattern() ); - min = Helpers::toKeyFormat( kp.extendRangeBound( min, false ) ); - if ( max.isEmpty() ) { + KeyPattern kp(idx->keyPattern()); + min = Helpers::toKeyFormat(kp.extendRangeBound(min, false)); + if (max.isEmpty()) { // if max not specified, make it (MaxKey, Maxkey, MaxKey...) - max = Helpers::toKeyFormat( kp.extendRangeBound( max, true ) ); + max = Helpers::toKeyFormat(kp.extendRangeBound(max, true)); } else { // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive - max = Helpers::toKeyFormat( kp.extendRangeBound( max, false ) ); + max = Helpers::toKeyFormat(kp.extendRangeBound(max, false)); } - auto_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn, collection, idx, - min, max, false, - InternalPlanner::FORWARD)); - exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); + const long long recCount = collection->numRecords(txn); + const long long dataSize = collection->dataSize(txn); - // Find the 'missingField' value used to represent a missing document field in a key of - // this index. - // NOTE A local copy of 'missingField' is made because indices may be - // invalidated during a db lock yield. - BSONObj missingFieldObj = IndexLegacy::getMissingField(txn, collection, idx->infoObj()); - BSONElement missingField = missingFieldObj.firstElement(); - - // for now, the only check is that all shard keys are filled - // a 'missingField' valued index key is ok if the field is present in the document, - // TODO if $exist for nulls were picking the index, it could be used instead efficiently - int keyPatternLength = keyPattern.nFields(); - - RecordId loc; - BSONObj currKey; - while (PlanExecutor::ADVANCED == exec->getNext(&currKey, &loc)) { - //check that current key contains non missing elements for all fields in keyPattern - BSONObjIterator i( currKey ); - for( int k = 0; k < keyPatternLength ; k++ ) { - if( ! i.more() ) { - errmsg = str::stream() << "index key " << currKey - << " too short for pattern " << keyPattern; - return false; + // + // 1.b Now that we have the size estimate, go over the remaining parameters and apply any maximum size + // restrictions specified there. + // + + // 'force'-ing a split is equivalent to having maxChunkSize be the size of the current chunk, i.e., the + // logic below will split that chunk in half + long long maxChunkSize = 0; + bool forceMedianSplit = false; + { + BSONElement maxSizeElem = jsobj["maxChunkSize"]; + BSONElement forceElem = jsobj["force"]; + + if (forceElem.trueValue()) { + forceMedianSplit = true; + // This chunk size is effectively ignored if force is true + maxChunkSize = dataSize; + + } else if (maxSizeElem.isNumber()) { + maxChunkSize = maxSizeElem.numberLong() * 1 << 20; + + } else { + maxSizeElem = jsobj["maxChunkSizeBytes"]; + if (maxSizeElem.isNumber()) { + maxChunkSize = maxSizeElem.numberLong(); } - BSONElement currKeyElt = i.next(); - - if ( !currKeyElt.eoo() && !currKeyElt.valuesEqual( missingField ) ) - continue; - - // This is a fetch, but it's OK. The underlying code won't throw a page fault - // exception. - BSONObj obj = collection->docFor(txn, loc).value(); - BSONObjIterator j( keyPattern ); - BSONElement real; - for ( int x=0; x <= k; x++ ) - real = j.next(); - - real = obj.getFieldDotted( real.fieldName() ); - - if ( real.type() ) - continue; - - ostringstream os; - os << "found missing value in key " << currKey << " for doc: " - << ( obj.hasField( "_id" ) ? obj.toString() : obj["_id"].toString() ); - log() << "checkShardingIndex for '" << ns << "' failed: " << os.str() << endl; - - errmsg = os.str(); + } + + // We need a maximum size for the chunk, unless we're not actually capable of finding any + // split points. + if (maxChunkSize <= 0 && recCount != 0) { + errmsg = + "need to specify the desired max chunk size (maxChunkSize or " + "maxChunkSizeBytes)"; return false; } } - return true; - } - } cmdCheckShardingIndex; - BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) { - return key.replaceFieldNames(keyPattern).clientReadable(); - } + // If there's not enough data for more than one chunk, no point continuing. + if (dataSize < maxChunkSize || recCount == 0) { + vector<BSONObj> emptyVector; + result.append("splitKeys", emptyVector); + return true; + } - class SplitVector : public Command { - public: - SplitVector() : Command( "splitVector" , false ) {} - virtual bool slaveOk() const { return false; } - virtual bool isWriteCommandForConfigServer() const { return false; } - virtual void help( stringstream &help ) const { - help << - "Internal command.\n" - "examples:\n" - " { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, maxChunkSize:200 }\n" - " maxChunkSize unit in MBs\n" - " May optionally specify 'maxSplitPoints' and 'maxChunkObjects' to avoid traversing the whole chunk\n" - " \n" - " { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, force: true }\n" - " 'force' will produce one split point even if data is small; defaults to false\n" - "NOTE: This command may take a while to run"; - } - virtual Status checkAuthForCommand(ClientBasic* client, - const std::string& dbname, - const BSONObj& cmdObj) { - if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource( - ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))), - ActionType::splitVector)) { - return Status(ErrorCodes::Unauthorized, "Unauthorized"); + log() << "request split points lookup for chunk " << ns << " " << min << " -->> " << max + << endl; + + // We'll use the average object size and number of object to find approximately how many keys + // each chunk should have. We'll split at half the maxChunkSize or maxChunkObjects, if + // provided. + const long long avgRecSize = dataSize / recCount; + long long keyCount = maxChunkSize / (2 * avgRecSize); + if (maxChunkObjects && (maxChunkObjects < keyCount)) { + log() << "limiting split vector to " << maxChunkObjects << " (from " << keyCount + << ") objects " << endl; + keyCount = maxChunkObjects; } - return Status::OK(); - } - virtual std::string parseNs(const string& dbname, const BSONObj& cmdObj) const { - return parseNsFullyQualified(dbname, cmdObj); - } - bool run(OperationContext* txn, const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { // - // 1.a We'll parse the parameters in two steps. First, make sure the we can use the split index to get - // a good approximation of the size of the chunk -- without needing to access the actual data. + // 2. Traverse the index and add the keyCount-th key to the result vector. If that key + // appeared in the vector before, we omit it. The invariant here is that all the + // instances of a given key value live in the same chunk. // - const std::string ns = parseNs(dbname, jsobj); - BSONObj keyPattern = jsobj.getObjectField( "keyPattern" ); + Timer timer; + long long currCount = 0; + long long numChunks = 0; - if ( keyPattern.isEmpty() ) { - errmsg = "no key pattern found in splitVector"; - return false; - } + auto_ptr<PlanExecutor> exec(InternalPlanner::indexScan( + txn, collection, idx, min, max, false, InternalPlanner::FORWARD)); - // If min and max are not provided use the "minKey" and "maxKey" for the sharding key pattern. - BSONObj min = jsobj.getObjectField( "min" ); - BSONObj max = jsobj.getObjectField( "max" ); - if ( min.isEmpty() != max.isEmpty() ){ - errmsg = "either provide both min and max or leave both empty"; + BSONObj currKey; + PlanExecutor::ExecState state = exec->getNext(&currKey, NULL); + if (PlanExecutor::ADVANCED != state) { + errmsg = "can't open a cursor for splitting (desired range is possibly empty)"; return false; } - long long maxSplitPoints = 0; - BSONElement maxSplitPointsElem = jsobj[ "maxSplitPoints" ]; - if ( maxSplitPointsElem.isNumber() ) { - maxSplitPoints = maxSplitPointsElem.numberLong(); - } - - long long maxChunkObjects = Chunk::MaxObjectPerChunk; - BSONElement MaxChunkObjectsElem = jsobj[ "maxChunkObjects" ]; - if ( MaxChunkObjectsElem.isNumber() ) { - maxChunkObjects = MaxChunkObjectsElem.numberLong(); - } + // Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, to be removed + // at the end. If a key appears more times than entries allowed on a chunk, we issue a warning and + // split on the following key. + set<BSONObj> tooFrequentKeys; + splitKeys.push_back( + prettyKey(idx->keyPattern(), currKey.getOwned()).extractFields(keyPattern)); - vector<BSONObj> splitKeys; + exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); + while (1) { + while (PlanExecutor::ADVANCED == state) { + currCount++; + + if (currCount > keyCount && !forceMedianSplit) { + currKey = prettyKey(idx->keyPattern(), currKey.getOwned()) + .extractFields(keyPattern); + // Do not use this split key if it is the same used in the previous split point. + if (currKey.woCompare(splitKeys.back()) == 0) { + tooFrequentKeys.insert(currKey.getOwned()); + } else { + splitKeys.push_back(currKey.getOwned()); + currCount = 0; + numChunks++; + LOG(4) << "picked a split key: " << currKey << endl; + } + } - { - // Get the size estimate for this namespace - AutoGetCollectionForRead ctx(txn, ns); - Collection* collection = ctx.getCollection(); - if ( !collection ) { - errmsg = "ns not found"; - return false; - } + // Stop if we have enough split points. + if (maxSplitPoints && (numChunks >= maxSplitPoints)) { + log() << "max number of requested split points reached (" << numChunks + << ") before the end of chunk " << ns << " " << min << " -->> " << max + << endl; + break; + } - // Allow multiKey based on the invariant that shard keys must be single-valued. - // Therefore, any multi-key index prefixed by shard key cannot be multikey over - // the shard key fields. - IndexDescriptor *idx = - collection->getIndexCatalog()->findIndexByPrefix( txn, - keyPattern, - false ); - if ( idx == NULL ) { - errmsg = (string)"couldn't find index over splitting key " + - keyPattern.clientReadable().toString(); - return false; - } - // extend min to get (min, MinKey, MinKey, ....) - KeyPattern kp( idx->keyPattern() ); - min = Helpers::toKeyFormat( kp.extendRangeBound ( min, false ) ); - if ( max.isEmpty() ) { - // if max not specified, make it (MaxKey, Maxkey, MaxKey...) - max = Helpers::toKeyFormat( kp.extendRangeBound( max, true ) ); - } else { - // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive - max = Helpers::toKeyFormat( kp.extendRangeBound( max, false ) ); + state = exec->getNext(&currKey, NULL); } - const long long recCount = collection->numRecords(txn); - const long long dataSize = collection->dataSize(txn); + if (!forceMedianSplit) + break; // - // 1.b Now that we have the size estimate, go over the remaining parameters and apply any maximum size - // restrictions specified there. + // If we're forcing a split at the halfway point, then the first pass was just + // to count the keys, and we still need a second pass. // - - // 'force'-ing a split is equivalent to having maxChunkSize be the size of the current chunk, i.e., the - // logic below will split that chunk in half - long long maxChunkSize = 0; - bool forceMedianSplit = false; - { - BSONElement maxSizeElem = jsobj[ "maxChunkSize" ]; - BSONElement forceElem = jsobj[ "force" ]; - - if ( forceElem.trueValue() ) { - forceMedianSplit = true; - // This chunk size is effectively ignored if force is true - maxChunkSize = dataSize; - - } - else if ( maxSizeElem.isNumber() ) { - maxChunkSize = maxSizeElem.numberLong() * 1<<20; - - } - else { - maxSizeElem = jsobj["maxChunkSizeBytes"]; - if ( maxSizeElem.isNumber() ) { - maxChunkSize = maxSizeElem.numberLong(); - } - } - // We need a maximum size for the chunk, unless we're not actually capable of finding any - // split points. - if ( maxChunkSize <= 0 && recCount != 0 ) { - errmsg = "need to specify the desired max chunk size (maxChunkSize or maxChunkSizeBytes)"; - return false; - } - } - - - // If there's not enough data for more than one chunk, no point continuing. - if ( dataSize < maxChunkSize || recCount == 0 ) { - vector<BSONObj> emptyVector; - result.append( "splitKeys" , emptyVector ); - return true; - } - - log() << "request split points lookup for chunk " << ns << " " << min << " -->> " << max << endl; - - // We'll use the average object size and number of object to find approximately how many keys - // each chunk should have. We'll split at half the maxChunkSize or maxChunkObjects, if - // provided. - const long long avgRecSize = dataSize / recCount; - long long keyCount = maxChunkSize / (2 * avgRecSize); - if ( maxChunkObjects && ( maxChunkObjects < keyCount ) ) { - log() << "limiting split vector to " << maxChunkObjects << " (from " << keyCount << ") objects " << endl; - keyCount = maxChunkObjects; - } - - // - // 2. Traverse the index and add the keyCount-th key to the result vector. If that key - // appeared in the vector before, we omit it. The invariant here is that all the - // instances of a given key value live in the same chunk. - // - - Timer timer; - long long currCount = 0; - long long numChunks = 0; - - auto_ptr<PlanExecutor> exec( - InternalPlanner::indexScan(txn, collection, idx, min, max, - false, InternalPlanner::FORWARD)); - - BSONObj currKey; - PlanExecutor::ExecState state = exec->getNext(&currKey, NULL); - if (PlanExecutor::ADVANCED != state) { - errmsg = "can't open a cursor for splitting (desired range is possibly empty)"; - return false; - } - - // Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, to be removed - // at the end. If a key appears more times than entries allowed on a chunk, we issue a warning and - // split on the following key. - set<BSONObj> tooFrequentKeys; - splitKeys.push_back(prettyKey(idx->keyPattern(), currKey.getOwned()).extractFields( keyPattern ) ); + forceMedianSplit = false; + keyCount = currCount / 2; + currCount = 0; + log() << "splitVector doing another cycle because of force, keyCount now: " + << keyCount << endl; + + exec.reset(InternalPlanner::indexScan( + txn, collection, idx, min, max, false, InternalPlanner::FORWARD)); exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); - while ( 1 ) { - while (PlanExecutor::ADVANCED == state) { - currCount++; - - if ( currCount > keyCount && !forceMedianSplit ) { - currKey = prettyKey(idx->keyPattern(), currKey.getOwned()).extractFields(keyPattern); - // Do not use this split key if it is the same used in the previous split point. - if ( currKey.woCompare( splitKeys.back() ) == 0 ) { - tooFrequentKeys.insert( currKey.getOwned() ); - } - else { - splitKeys.push_back( currKey.getOwned() ); - currCount = 0; - numChunks++; - LOG(4) << "picked a split key: " << currKey << endl; - } - } + state = exec->getNext(&currKey, NULL); + } - // Stop if we have enough split points. - if ( maxSplitPoints && ( numChunks >= maxSplitPoints ) ) { - log() << "max number of requested split points reached (" << numChunks - << ") before the end of chunk " << ns << " " << min << " -->> " << max - << endl; - break; - } + // + // 3. Format the result and issue any warnings about the data we gathered while traversing the + // index + // - state = exec->getNext(&currKey, NULL); - } - - if ( ! forceMedianSplit ) - break; - - // - // If we're forcing a split at the halfway point, then the first pass was just - // to count the keys, and we still need a second pass. - // + // Warn for keys that are more numerous than maxChunkSize allows. + for (set<BSONObj>::const_iterator it = tooFrequentKeys.begin(); + it != tooFrequentKeys.end(); + ++it) { + warning() << "chunk is larger than " << maxChunkSize << " bytes because of key " + << prettyKey(idx->keyPattern(), *it) << endl; + } - forceMedianSplit = false; - keyCount = currCount / 2; - currCount = 0; - log() << "splitVector doing another cycle because of force, keyCount now: " << keyCount << endl; + // Remove the sentinel at the beginning before returning + splitKeys.erase(splitKeys.begin()); - exec.reset(InternalPlanner::indexScan(txn, collection, idx, min, max, - false, InternalPlanner::FORWARD)); + if (timer.millis() > serverGlobalParams.slowMS) { + warning() << "Finding the split vector for " << ns << " over " << keyPattern + << " keyCount: " << keyCount << " numSplits: " << splitKeys.size() + << " lookedAt: " << currCount << " took " << timer.millis() << "ms" + << endl; + } - exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); - state = exec->getNext(&currKey, NULL); - } + // Warning: we are sending back an array of keys but are currently limited to + // 4MB work of 'result' size. This should be okay for now. - // - // 3. Format the result and issue any warnings about the data we gathered while traversing the - // index - // - - // Warn for keys that are more numerous than maxChunkSize allows. - for ( set<BSONObj>::const_iterator it = tooFrequentKeys.begin(); it != tooFrequentKeys.end(); ++it ) { - warning() << "chunk is larger than " << maxChunkSize - << " bytes because of key " << prettyKey(idx->keyPattern(), *it ) << endl; - } - - // Remove the sentinel at the beginning before returning - splitKeys.erase( splitKeys.begin() ); - - if (timer.millis() > serverGlobalParams.slowMS) { - warning() << "Finding the split vector for " << ns << " over "<< keyPattern - << " keyCount: " << keyCount << " numSplits: " << splitKeys.size() - << " lookedAt: " << currCount << " took " << timer.millis() << "ms" - << endl; - } - - // Warning: we are sending back an array of keys but are currently limited to - // 4MB work of 'result' size. This should be okay for now. + result.append("timeMillis", timer.millis()); + } - result.append( "timeMillis", timer.millis() ); - } + result.append("splitKeys", splitKeys); + return true; + } +} cmdSplitVector; + +class SplitChunkCommand : public Command { +public: + SplitChunkCommand() : Command("splitChunk") {} + virtual void help(stringstream& help) const { + help << "internal command usage only\n" + "example:\n" + " { splitChunk:\"db.foo\" , keyPattern: {a:1} , min : {a:100} , max: {a:200} { " + "splitKeys : [ {a:150} , ... ]}"; + } - result.append( "splitKeys" , splitKeys ); - return true; + virtual bool slaveOk() const { + return false; + } + virtual bool adminOnly() const { + return true; + } + virtual bool isWriteCommandForConfigServer() const { + return false; + } + virtual Status checkAuthForCommand(ClientBasic* client, + const std::string& dbname, + const BSONObj& cmdObj) { + if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource( + ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))), + ActionType::splitChunk)) { + return Status(ErrorCodes::Unauthorized, "Unauthorized"); + } + return Status::OK(); + } + virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const { + return parseNsFullyQualified(dbname, cmdObj); + } + bool run(OperationContext* txn, + const string& dbname, + BSONObj& cmdObj, + int, + string& errmsg, + BSONObjBuilder& result, + bool fromRepl) { + // + // 1. check whether parameters passed to splitChunk are sound + // + + const string ns = parseNs(dbname, cmdObj); + if (ns.empty()) { + errmsg = "need to specify namespace in command"; + return false; + } + const BSONObj keyPattern = cmdObj["keyPattern"].Obj(); + if (keyPattern.isEmpty()) { + errmsg = "need to specify the key pattern the collection is sharded over"; + return false; } - } cmdSplitVector; - - class SplitChunkCommand : public Command { - public: - SplitChunkCommand() : Command( "splitChunk" ) {} - virtual void help( stringstream& help ) const { - help << - "internal command usage only\n" - "example:\n" - " { splitChunk:\"db.foo\" , keyPattern: {a:1} , min : {a:100} , max: {a:200} { splitKeys : [ {a:150} , ... ]}"; - } - - virtual bool slaveOk() const { return false; } - virtual bool adminOnly() const { return true; } - virtual bool isWriteCommandForConfigServer() const { return false; } - virtual Status checkAuthForCommand(ClientBasic* client, - const std::string& dbname, - const BSONObj& cmdObj) { - if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource( - ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))), - ActionType::splitChunk)) { - return Status(ErrorCodes::Unauthorized, "Unauthorized"); - } - return Status::OK(); + + const BSONObj min = cmdObj["min"].Obj(); + if (min.isEmpty()) { + errmsg = "need to specify the min key for the chunk"; + return false; } - virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const { - return parseNsFullyQualified(dbname, cmdObj); + + const BSONObj max = cmdObj["max"].Obj(); + if (max.isEmpty()) { + errmsg = "need to specify the max key for the chunk"; + return false; } - bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { - // - // 1. check whether parameters passed to splitChunk are sound - // + const string shardName = cmdObj["from"].str(); + if (shardName.empty()) { + errmsg = "need specify server to split chunk at"; + return false; + } - const string ns = parseNs(dbname, cmdObj); - if ( ns.empty() ) { - errmsg = "need to specify namespace in command"; - return false; - } + const BSONObj splitKeysElem = cmdObj["splitKeys"].Obj(); + if (splitKeysElem.isEmpty()) { + errmsg = "need to provide the split points to chunk over"; + return false; + } + vector<BSONObj> splitKeys; + BSONObjIterator it(splitKeysElem); + while (it.more()) { + splitKeys.push_back(it.next().Obj().getOwned()); + } - const BSONObj keyPattern = cmdObj["keyPattern"].Obj(); - if ( keyPattern.isEmpty() ) { - errmsg = "need to specify the key pattern the collection is sharded over"; - return false; - } + const BSONElement shardId = cmdObj["shardId"]; + if (shardId.eoo()) { + errmsg = "need to provide shardId"; + return false; + } - const BSONObj min = cmdObj["min"].Obj(); - if ( min.isEmpty() ) { - errmsg = "need to specify the min key for the chunk"; - return false; - } + // + // Get sharding state up-to-date + // - const BSONObj max = cmdObj["max"].Obj(); - if ( max.isEmpty() ) { - errmsg = "need to specify the max key for the chunk"; + // This could be the first call that enables sharding - make sure we initialize the + // sharding state for this shard. + if (!shardingState.enabled()) { + if (cmdObj["configdb"].type() != String) { + errmsg = "sharding not enabled"; + warning() << errmsg << endl; return false; } + string configdb = cmdObj["configdb"].String(); + ShardingState::initialize(configdb); + } - const string shardName = cmdObj["from"].str(); - if ( shardName.empty() ) { - errmsg = "need specify server to split chunk at"; - return false; - } + // Initialize our current shard name in the shard state if needed + shardingState.gotShardName(shardName); - const BSONObj splitKeysElem = cmdObj["splitKeys"].Obj(); - if ( splitKeysElem.isEmpty() ) { - errmsg = "need to provide the split points to chunk over"; - return false; - } - vector<BSONObj> splitKeys; - BSONObjIterator it( splitKeysElem ); - while ( it.more() ) { - splitKeys.push_back( it.next().Obj().getOwned() ); - } + ConnectionString configLoc = + ConnectionString::parse(shardingState.getConfigServer(), errmsg); + if (!configLoc.isValid()) { + warning() << errmsg; + return false; + } - const BSONElement shardId = cmdObj["shardId"]; - if ( shardId.eoo() ) { - errmsg = "need to provide shardId"; - return false; - } + log() << "received splitChunk request: " << cmdObj; - // - // Get sharding state up-to-date - // + // + // 2. lock the collection's metadata and get highest version for the current shard + // - // This could be the first call that enables sharding - make sure we initialize the - // sharding state for this shard. - if ( ! shardingState.enabled() ) { - if ( cmdObj["configdb"].type() != String ) { - errmsg = "sharding not enabled"; - warning() << errmsg << endl; - return false; - } - string configdb = cmdObj["configdb"].String(); - ShardingState::initialize(configdb); - } + ScopedDistributedLock collLock(configLoc, ns); + collLock.setLockMessage(str::stream() << "splitting chunk [" << minKey << ", " << maxKey + << ") in " << ns); - // Initialize our current shard name in the shard state if needed - shardingState.gotShardName(shardName); + Status acquisitionStatus = collLock.tryAcquire(); + if (!acquisitionStatus.isOK()) { + errmsg = str::stream() << "could not acquire collection lock for " << ns + << " to split chunk [" << minKey << "," << maxKey << ")" + << causedBy(acquisitionStatus); - ConnectionString configLoc = ConnectionString::parse(shardingState.getConfigServer(), - errmsg); - if (!configLoc.isValid()) { - warning() << errmsg; - return false; - } + warning() << errmsg << endl; + return false; + } - log() << "received splitChunk request: " << cmdObj; + // Always check our version remotely + ChunkVersion shardVersion; + Status refreshStatus = shardingState.refreshMetadataNow(txn, ns, &shardVersion); - // - // 2. lock the collection's metadata and get highest version for the current shard - // + if (!refreshStatus.isOK()) { + errmsg = str::stream() << "splitChunk cannot split chunk " + << "[" << minKey << "," << maxKey << ")" + << causedBy(refreshStatus.reason()); + + warning() << errmsg; + return false; + } - ScopedDistributedLock collLock(configLoc, ns); - collLock.setLockMessage(str::stream() << "splitting chunk [" << minKey << ", " << maxKey - << ") in " << ns); + if (shardVersion.majorVersion() == 0) { + // It makes no sense to split if our version is zero and we have no chunks + errmsg = str::stream() << "splitChunk cannot split chunk " + << "[" << minKey << "," << maxKey << ")" + << " with zero shard version"; - Status acquisitionStatus = collLock.tryAcquire(); - if (!acquisitionStatus.isOK()) { - errmsg = str::stream() << "could not acquire collection lock for " << ns - << " to split chunk [" << minKey << "," << maxKey << ")" - << causedBy(acquisitionStatus); + warning() << errmsg; + return false; + } - warning() << errmsg << endl; + // From mongos >= v3.0. + BSONElement epochElem(cmdObj["epoch"]); + if (epochElem.type() == jstOID) { + OID cmdEpoch = epochElem.OID(); + + if (cmdEpoch != shardVersion.epoch()) { + errmsg = str::stream() << "splitChunk cannot split chunk " + << "[" << minKey << "," << maxKey << "), " + << "collection may have been dropped. " + << "current epoch: " << shardVersion.epoch() + << ", cmd epoch: " << cmdEpoch; + warning() << errmsg; return false; } + } - // Always check our version remotely - ChunkVersion shardVersion; - Status refreshStatus = shardingState.refreshMetadataNow(txn, ns, &shardVersion); + // Get collection metadata + const CollectionMetadataPtr collMetadata(shardingState.getCollectionMetadata(ns)); + // With nonzero shard version, we must have metadata + invariant(NULL != collMetadata); - if (!refreshStatus.isOK()) { + ChunkVersion collVersion = collMetadata->getCollVersion(); + // With nonzero shard version, we must have a coll version >= our shard version + invariant(collVersion >= shardVersion); - errmsg = str::stream() << "splitChunk cannot split chunk " << "[" << minKey << "," - << maxKey << ")" << causedBy(refreshStatus.reason()); + ChunkType origChunk; + if (!collMetadata->getNextChunk(min, &origChunk) || origChunk.getMin().woCompare(min) || + origChunk.getMax().woCompare(max)) { + // Our boundaries are different from those passed in + errmsg = str::stream() << "splitChunk cannot find chunk " + << "[" << minKey << "," << maxKey << ")" + << " to split, the chunk boundaries may be stale"; - warning() << errmsg; - return false; - } + warning() << errmsg; + return false; + } - if (shardVersion.majorVersion() == 0) { + log() << "splitChunk accepted at version " << shardVersion; - // It makes no sense to split if our version is zero and we have no chunks - errmsg = str::stream() << "splitChunk cannot split chunk " << "[" << minKey << "," - << maxKey << ")" << " with zero shard version"; + // + // 3. create the batch of updates to metadata ( the new chunks ) to be applied via 'applyOps' command + // - warning() << errmsg; - return false; - } + BSONObjBuilder logDetail; + appendShortVersion(logDetail.subobjStart("before"), origChunk); + LOG(1) << "before split on " << origChunk << endl; + OwnedPointerVector<ChunkType> newChunks; - // From mongos >= v3.0. - BSONElement epochElem(cmdObj["epoch"]); - if (epochElem.type() == jstOID) { - OID cmdEpoch = epochElem.OID(); - - if (cmdEpoch != shardVersion.epoch()) { - errmsg = str::stream() << "splitChunk cannot split chunk " - << "[" << minKey << "," - << maxKey << "), " - << "collection may have been dropped. " - << "current epoch: " << shardVersion.epoch() - << ", cmd epoch: " << cmdEpoch; - warning() << errmsg; - return false; - } - } + ChunkVersion nextChunkVersion = collVersion; + BSONObj startKey = min; + splitKeys.push_back(max); // makes it easier to have 'max' in the next loop. remove later. - // Get collection metadata - const CollectionMetadataPtr collMetadata(shardingState.getCollectionMetadata(ns)); - // With nonzero shard version, we must have metadata - invariant(NULL != collMetadata); + BSONObjBuilder cmdBuilder; + BSONArrayBuilder updates(cmdBuilder.subarrayStart("applyOps")); - ChunkVersion collVersion = collMetadata->getCollVersion(); - // With nonzero shard version, we must have a coll version >= our shard version - invariant(collVersion >= shardVersion); + for (vector<BSONObj>::const_iterator it = splitKeys.begin(); it != splitKeys.end(); ++it) { + BSONObj endKey = *it; - ChunkType origChunk; - if (!collMetadata->getNextChunk(min, &origChunk) - || origChunk.getMin().woCompare(min) || origChunk.getMax().woCompare(max)) { + if (endKey.woCompare(startKey) == 0) { + errmsg = str::stream() << "split on the lower bound of chunk " + << "[" << min << ", " << max << ")" + << " is not allowed"; - // Our boundaries are different from those passed in - errmsg = str::stream() << "splitChunk cannot find chunk " - << "[" << minKey << "," << maxKey << ")" - << " to split, the chunk boundaries may be stale"; + warning() << errmsg << endl; + return false; + } + // Make sure splits don't create too-big shard keys + Status status = ShardKeyPattern::checkShardKeySize(endKey); + if (!status.isOK()) { + errmsg = status.reason(); warning() << errmsg; return false; } - log() << "splitChunk accepted at version " << shardVersion; - - // - // 3. create the batch of updates to metadata ( the new chunks ) to be applied via 'applyOps' command - // + // splits only update the 'minor' portion of version + nextChunkVersion.incMinor(); - BSONObjBuilder logDetail; - appendShortVersion(logDetail.subobjStart("before"), origChunk); - LOG(1) << "before split on " << origChunk << endl; - OwnedPointerVector<ChunkType> newChunks; + // build an update operation against the chunks collection of the config database with + // upsert true + BSONObjBuilder op; + op.append("op", "u"); + op.appendBool("b", true); + op.append("ns", ChunkType::ConfigNS); - ChunkVersion nextChunkVersion = collVersion; - BSONObj startKey = min; - splitKeys.push_back( max ); // makes it easier to have 'max' in the next loop. remove later. + // add the modified (new) chunk information as the update object + BSONObjBuilder n(op.subobjStart("o")); + n.append(ChunkType::name(), Chunk::genID(ns, startKey)); + nextChunkVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); + n.append(ChunkType::ns(), ns); + n.append(ChunkType::min(), startKey); + n.append(ChunkType::max(), endKey); + n.append(ChunkType::shard(), shardName); + n.done(); - BSONObjBuilder cmdBuilder; - BSONArrayBuilder updates( cmdBuilder.subarrayStart( "applyOps" ) ); + // add the chunk's _id as the query part of the update statement + BSONObjBuilder q(op.subobjStart("o2")); + q.append(ChunkType::name(), Chunk::genID(ns, startKey)); + q.done(); - for ( vector<BSONObj>::const_iterator it = splitKeys.begin(); it != splitKeys.end(); ++it ) { - BSONObj endKey = *it; + updates.append(op.obj()); - if ( endKey.woCompare( startKey ) == 0) { - errmsg = str::stream() << "split on the lower bound of chunk " - << "[" << min << ", " << max << ")" - << " is not allowed"; + // remember this chunk info for logging later + auto_ptr<ChunkType> chunk(new ChunkType()); + chunk->setMin(startKey); + chunk->setMax(endKey); + chunk->setVersion(nextChunkVersion); - warning() << errmsg << endl; - return false; - } + newChunks.push_back(chunk.release()); - // Make sure splits don't create too-big shard keys - Status status = ShardKeyPattern::checkShardKeySize(endKey); - if (!status.isOK()) { - errmsg = status.reason(); - warning() << errmsg; - return false; - } - - // splits only update the 'minor' portion of version - nextChunkVersion.incMinor(); - - // build an update operation against the chunks collection of the config database with - // upsert true - BSONObjBuilder op; - op.append( "op" , "u" ); - op.appendBool( "b" , true ); - op.append( "ns" , ChunkType::ConfigNS ); - - // add the modified (new) chunk information as the update object - BSONObjBuilder n( op.subobjStart( "o" ) ); - n.append(ChunkType::name(), Chunk::genID(ns, startKey)); - nextChunkVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); - n.append(ChunkType::ns(), ns); - n.append(ChunkType::min(), startKey); - n.append(ChunkType::max(), endKey); - n.append(ChunkType::shard(), shardName); - n.done(); - - // add the chunk's _id as the query part of the update statement - BSONObjBuilder q( op.subobjStart( "o2" ) ); - q.append(ChunkType::name(), Chunk::genID(ns, startKey)); - q.done(); - - updates.append( op.obj() ); - - // remember this chunk info for logging later - auto_ptr<ChunkType> chunk(new ChunkType()); - chunk->setMin(startKey); - chunk->setMax(endKey); - chunk->setVersion(nextChunkVersion); - - newChunks.push_back(chunk.release()); - - startKey = endKey; - } + startKey = endKey; + } - splitKeys.pop_back(); // 'max' was used as sentinel + splitKeys.pop_back(); // 'max' was used as sentinel - updates.done(); + updates.done(); + { + BSONArrayBuilder preCond(cmdBuilder.subarrayStart("preCondition")); + BSONObjBuilder b; + b.append("ns", ChunkType::ConfigNS); + b.append("q", + BSON("query" << BSON(ChunkType::ns(ns)) << "orderby" + << BSON(ChunkType::DEPRECATED_lastmod() << -1))); { - BSONArrayBuilder preCond( cmdBuilder.subarrayStart( "preCondition" ) ); - BSONObjBuilder b; - b.append("ns", ChunkType::ConfigNS); - b.append("q", BSON("query" << BSON(ChunkType::ns(ns)) << - "orderby" << BSON(ChunkType::DEPRECATED_lastmod() << -1))); - { - BSONObjBuilder bb( b.subobjStart( "res" ) ); - // TODO: For backwards compatibility, we can't yet require an epoch here - bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), collVersion.toLong()); - bb.done(); - } - preCond.append( b.obj() ); - preCond.done(); + BSONObjBuilder bb(b.subobjStart("res")); + // TODO: For backwards compatibility, we can't yet require an epoch here + bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), collVersion.toLong()); + bb.done(); } + preCond.append(b.obj()); + preCond.done(); + } - // - // 4. apply the batch of updates to remote and local metadata - // + // + // 4. apply the batch of updates to remote and local metadata + // - BSONObj cmd = cmdBuilder.obj(); + BSONObj cmd = cmdBuilder.obj(); - LOG(1) << "splitChunk update: " << cmd << endl; + LOG(1) << "splitChunk update: " << cmd << endl; - bool ok; - BSONObj cmdResult; - { - ScopedDbConnection conn(shardingState.getConfigServer(), 30); - ok = conn->runCommand( "config" , cmd , cmdResult ); - conn.done(); - } + bool ok; + BSONObj cmdResult; + { + ScopedDbConnection conn(shardingState.getConfigServer(), 30); + ok = conn->runCommand("config", cmd, cmdResult); + conn.done(); + } - if ( ! ok ) { - stringstream ss; - ss << "saving chunks failed. cmd: " << cmd << " result: " << cmdResult; - error() << ss.str() << endl; - msgasserted( 13593 , ss.str() ); - } + if (!ok) { + stringstream ss; + ss << "saving chunks failed. cmd: " << cmd << " result: " << cmdResult; + error() << ss.str() << endl; + msgasserted(13593, ss.str()); + } - // - // Install chunk metadata with knowledge about newly split chunks in this shard's state - // - - { - ScopedTransaction transaction(txn, MODE_IX); - Lock::DBLock writeLk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_IX); - Lock::CollectionLock collLock(txn->lockState(), ns, MODE_X); + // + // Install chunk metadata with knowledge about newly split chunks in this shard's state + // - // NOTE: The newShardVersion resulting from this split is higher than any - // other chunk version, so it's also implicitly the newCollVersion - ChunkVersion newShardVersion = collVersion; + { + ScopedTransaction transaction(txn, MODE_IX); + Lock::DBLock writeLk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_IX); + Lock::CollectionLock collLock(txn->lockState(), ns, MODE_X); - // Increment the minor version once, shardingState.splitChunk increments once - // per split point (resulting in the correct final shard/collection version) - // TODO: Revisit this interface, it's a bit clunky - newShardVersion.incMinor(); + // NOTE: The newShardVersion resulting from this split is higher than any + // other chunk version, so it's also implicitly the newCollVersion + ChunkVersion newShardVersion = collVersion; - shardingState.splitChunk(txn, ns, min, max, splitKeys, newShardVersion); - } + // Increment the minor version once, shardingState.splitChunk increments once + // per split point (resulting in the correct final shard/collection version) + // TODO: Revisit this interface, it's a bit clunky + newShardVersion.incMinor(); - // - // 5. logChanges - // + shardingState.splitChunk(txn, ns, min, max, splitKeys, newShardVersion); + } - // single splits are logged different than multisplits - if ( newChunks.size() == 2 ) { - appendShortVersion(logDetail.subobjStart("left"), *newChunks[0]); - appendShortVersion(logDetail.subobjStart("right"), *newChunks[1]); - configServer.logChange( "split" , ns , logDetail.obj() ); + // + // 5. logChanges + // + + // single splits are logged different than multisplits + if (newChunks.size() == 2) { + appendShortVersion(logDetail.subobjStart("left"), *newChunks[0]); + appendShortVersion(logDetail.subobjStart("right"), *newChunks[1]); + configServer.logChange("split", ns, logDetail.obj()); + } else { + BSONObj beforeDetailObj = logDetail.obj(); + BSONObj firstDetailObj = beforeDetailObj.getOwned(); + const int newChunksSize = newChunks.size(); + + for (int i = 0; i < newChunksSize; i++) { + BSONObjBuilder chunkDetail; + chunkDetail.appendElements(beforeDetailObj); + chunkDetail.append("number", i + 1); + chunkDetail.append("of", newChunksSize); + appendShortVersion(chunkDetail.subobjStart("chunk"), *newChunks[i]); + configServer.logChange("multi-split", ns, chunkDetail.obj()); } - else { - BSONObj beforeDetailObj = logDetail.obj(); - BSONObj firstDetailObj = beforeDetailObj.getOwned(); - const int newChunksSize = newChunks.size(); - - for ( int i=0; i < newChunksSize; i++ ) { - BSONObjBuilder chunkDetail; - chunkDetail.appendElements( beforeDetailObj ); - chunkDetail.append( "number", i+1 ); - chunkDetail.append( "of" , newChunksSize ); - appendShortVersion(chunkDetail.subobjStart("chunk"), *newChunks[i]); - configServer.logChange( "multi-split" , ns , chunkDetail.obj() ); - } - } - - dassert(newChunks.size() > 1); + } - { - AutoGetCollectionForRead ctx(txn, ns); - Collection* collection = ctx.getCollection(); - if (!collection) { - warning() << "will not perform top-chunk checking since " << ns - << " does not exist after splitting"; - return true; - } + dassert(newChunks.size() > 1); - // Allow multiKey based on the invariant that shard keys must be - // single-valued. Therefore, any multi-key index prefixed by shard - // key cannot be multikey over the shard key fields. - IndexDescriptor *idx = - collection->getIndexCatalog()->findIndexByPrefix(txn, keyPattern, false); + { + AutoGetCollectionForRead ctx(txn, ns); + Collection* collection = ctx.getCollection(); + if (!collection) { + warning() << "will not perform top-chunk checking since " << ns + << " does not exist after splitting"; + return true; + } - if (idx == NULL) { - return true; - } + // Allow multiKey based on the invariant that shard keys must be + // single-valued. Therefore, any multi-key index prefixed by shard + // key cannot be multikey over the shard key fields. + IndexDescriptor* idx = + collection->getIndexCatalog()->findIndexByPrefix(txn, keyPattern, false); - const ChunkType* backChunk = newChunks.vector().back(); - const ChunkType* frontChunk = newChunks.vector().front(); + if (idx == NULL) { + return true; + } - if (checkIfSingleDoc(txn, collection, idx, backChunk)) { - result.append("shouldMigrate", - BSON("min" << backChunk->getMin() - << "max" << backChunk->getMax())); - } - else if (checkIfSingleDoc(txn, collection, idx, frontChunk)) { - result.append("shouldMigrate", - BSON("min" << frontChunk->getMin() - << "max" << frontChunk->getMax())); + const ChunkType* backChunk = newChunks.vector().back(); + const ChunkType* frontChunk = newChunks.vector().front(); - } + if (checkIfSingleDoc(txn, collection, idx, backChunk)) { + result.append("shouldMigrate", + BSON("min" << backChunk->getMin() << "max" << backChunk->getMax())); + } else if (checkIfSingleDoc(txn, collection, idx, frontChunk)) { + result.append("shouldMigrate", + BSON("min" << frontChunk->getMin() << "max" << frontChunk->getMax())); } - - return true; } - private: + return true; + } - /** - * Append min, max and version information from chunk to the buffer. - */ - static void appendShortVersion(BufBuilder& b, const ChunkType& chunk) { - BSONObjBuilder bb(b); - bb.append(ChunkType::min(), chunk.getMin()); - bb.append(ChunkType::max(), chunk.getMax()); - if (chunk.isVersionSet()) - chunk.getVersion().addToBSON(bb, ChunkType::DEPRECATED_lastmod()); - bb.done(); - } +private: + /** + * Append min, max and version information from chunk to the buffer. + */ + static void appendShortVersion(BufBuilder& b, const ChunkType& chunk) { + BSONObjBuilder bb(b); + bb.append(ChunkType::min(), chunk.getMin()); + bb.append(ChunkType::max(), chunk.getMax()); + if (chunk.isVersionSet()) + chunk.getVersion().addToBSON(bb, ChunkType::DEPRECATED_lastmod()); + bb.done(); + } - static bool checkIfSingleDoc(OperationContext* txn, - Collection* collection, - const IndexDescriptor* idx, - const ChunkType* chunk) { - KeyPattern kp(idx->keyPattern()); - BSONObj newmin = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMin(), false)); - BSONObj newmax = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMax(), true)); + static bool checkIfSingleDoc(OperationContext* txn, + Collection* collection, + const IndexDescriptor* idx, + const ChunkType* chunk) { + KeyPattern kp(idx->keyPattern()); + BSONObj newmin = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMin(), false)); + BSONObj newmax = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMax(), true)); - auto_ptr<PlanExecutor> exec( - InternalPlanner::indexScan(txn, collection, idx, newmin, newmax, false)); + auto_ptr<PlanExecutor> exec( + InternalPlanner::indexScan(txn, collection, idx, newmin, newmax, false)); - // check if exactly one document found - if (PlanExecutor::ADVANCED == exec->getNext(NULL, NULL)) { - if (PlanExecutor::IS_EOF == exec->getNext(NULL, NULL)) { - return true; - } + // check if exactly one document found + if (PlanExecutor::ADVANCED == exec->getNext(NULL, NULL)) { + if (PlanExecutor::IS_EOF == exec->getNext(NULL, NULL)) { + return true; } - - return false; } - } cmdSplitChunk; + return false; + } + +} cmdSplitChunk; } // namespace mongo |