summaryrefslogtreecommitdiff
path: root/src/mongo/s/d_split.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/s/d_split.cpp')
-rw-r--r--src/mongo/s/d_split.cpp1453
1 files changed, 744 insertions, 709 deletions
diff --git a/src/mongo/s/d_split.cpp b/src/mongo/s/d_split.cpp
index 1cb2504ab8c..ba1022f0513 100644
--- a/src/mongo/s/d_split.cpp
+++ b/src/mongo/s/d_split.cpp
@@ -50,7 +50,7 @@
#include "mongo/db/instance.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/query/internal_plans.h"
-#include "mongo/s/chunk.h" // for static genID only
+#include "mongo/s/chunk.h" // for static genID only
#include "mongo/s/chunk_version.h"
#include "mongo/s/config.h"
#include "mongo/s/d_state.h"
@@ -62,861 +62,896 @@
namespace mongo {
- using std::auto_ptr;
- using std::endl;
- using std::ostringstream;
- using std::set;
- using std::string;
- using std::stringstream;
- using std::vector;
-
- class CmdMedianKey : public Command {
- public:
- CmdMedianKey() : Command( "medianKey" ) {}
- virtual bool slaveOk() const { return true; }
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual void help( stringstream &help ) const {
- help << "Deprecated internal command. Use splitVector command instead. \n";
- }
- // No auth required as this command no longer does anything.
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {}
- bool run(OperationContext* txn, const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
- errmsg = "medianKey command no longer supported. Calling this indicates mismatch between mongo versions.";
+using std::auto_ptr;
+using std::endl;
+using std::ostringstream;
+using std::set;
+using std::string;
+using std::stringstream;
+using std::vector;
+
+class CmdMedianKey : public Command {
+public:
+ CmdMedianKey() : Command("medianKey") {}
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual void help(stringstream& help) const {
+ help << "Deprecated internal command. Use splitVector command instead. \n";
+ }
+ // No auth required as this command no longer does anything.
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {}
+ bool run(OperationContext* txn,
+ const string& dbname,
+ BSONObj& jsobj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ errmsg =
+ "medianKey command no longer supported. Calling this indicates mismatch between mongo "
+ "versions.";
+ return false;
+ }
+} cmdMedianKey;
+
+class CheckShardingIndex : public Command {
+public:
+ CheckShardingIndex() : Command("checkShardingIndex", false) {}
+ virtual bool slaveOk() const {
+ return false;
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual void help(stringstream& help) const {
+ help << "Internal command.\n";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::find);
+ out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions));
+ }
+
+ std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const {
+ return parseNsFullyQualified(dbname, cmdObj);
+ }
+
+ bool run(OperationContext* txn,
+ const string& dbname,
+ BSONObj& jsobj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ std::string ns = parseNs(dbname, jsobj);
+ BSONObj keyPattern = jsobj.getObjectField("keyPattern");
+
+ if (keyPattern.isEmpty()) {
+ errmsg = "no key pattern found in checkShardingindex";
return false;
}
- } cmdMedianKey;
- class CheckShardingIndex : public Command {
- public:
- CheckShardingIndex() : Command( "checkShardingIndex" , false ) {}
- virtual bool slaveOk() const { return false; }
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual void help( stringstream &help ) const {
- help << "Internal command.\n";
+ if (keyPattern.nFields() == 1 && str::equals("_id", keyPattern.firstElementFieldName())) {
+ result.appendBool("idskip", true);
+ return true;
+ }
+
+ BSONObj min = jsobj.getObjectField("min");
+ BSONObj max = jsobj.getObjectField("max");
+ if (min.isEmpty() != max.isEmpty()) {
+ errmsg = "either provide both min and max or leave both empty";
+ return false;
}
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::find);
- out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions));
+
+ AutoGetCollectionForRead ctx(txn, ns);
+ Collection* collection = ctx.getCollection();
+ if (!collection) {
+ errmsg = "ns not found";
+ return false;
}
- std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const {
- return parseNsFullyQualified(dbname, cmdObj);
+ IndexDescriptor* idx = collection->getIndexCatalog()->findIndexByPrefix(
+ txn, keyPattern, true); /* require single key */
+ if (idx == NULL) {
+ errmsg = "couldn't find valid index for shard key";
+ return false;
+ }
+ // extend min to get (min, MinKey, MinKey, ....)
+ KeyPattern kp(idx->keyPattern());
+ min = Helpers::toKeyFormat(kp.extendRangeBound(min, false));
+ if (max.isEmpty()) {
+ // if max not specified, make it (MaxKey, Maxkey, MaxKey...)
+ max = Helpers::toKeyFormat(kp.extendRangeBound(max, true));
+ } else {
+ // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive
+ max = Helpers::toKeyFormat(kp.extendRangeBound(max, false));
}
- bool run(OperationContext* txn, const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ auto_ptr<PlanExecutor> exec(InternalPlanner::indexScan(
+ txn, collection, idx, min, max, false, InternalPlanner::FORWARD));
+ exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);
+
+ // Find the 'missingField' value used to represent a missing document field in a key of
+ // this index.
+ // NOTE A local copy of 'missingField' is made because indices may be
+ // invalidated during a db lock yield.
+ BSONObj missingFieldObj = IndexLegacy::getMissingField(txn, collection, idx->infoObj());
+ BSONElement missingField = missingFieldObj.firstElement();
+
+ // for now, the only check is that all shard keys are filled
+ // a 'missingField' valued index key is ok if the field is present in the document,
+ // TODO if $exist for nulls were picking the index, it could be used instead efficiently
+ int keyPatternLength = keyPattern.nFields();
+
+ RecordId loc;
+ BSONObj currKey;
+ while (PlanExecutor::ADVANCED == exec->getNext(&currKey, &loc)) {
+ // check that current key contains non missing elements for all fields in keyPattern
+ BSONObjIterator i(currKey);
+ for (int k = 0; k < keyPatternLength; k++) {
+ if (!i.more()) {
+ errmsg = str::stream() << "index key " << currKey << " too short for pattern "
+ << keyPattern;
+ return false;
+ }
+ BSONElement currKeyElt = i.next();
- std::string ns = parseNs(dbname, jsobj);
- BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
+ if (!currKeyElt.eoo() && !currKeyElt.valuesEqual(missingField))
+ continue;
- if ( keyPattern.isEmpty() ) {
- errmsg = "no key pattern found in checkShardingindex";
- return false;
- }
+ // This is a fetch, but it's OK. The underlying code won't throw a page fault
+ // exception.
+ BSONObj obj = collection->docFor(txn, loc).value();
+ BSONObjIterator j(keyPattern);
+ BSONElement real;
+ for (int x = 0; x <= k; x++)
+ real = j.next();
- if ( keyPattern.nFields() == 1 && str::equals( "_id" , keyPattern.firstElementFieldName() ) ) {
- result.appendBool( "idskip" , true );
- return true;
- }
+ real = obj.getFieldDotted(real.fieldName());
+
+ if (real.type())
+ continue;
+
+ ostringstream os;
+ os << "found missing value in key " << currKey << " for doc: "
+ << (obj.hasField("_id") ? obj.toString() : obj["_id"].toString());
+ log() << "checkShardingIndex for '" << ns << "' failed: " << os.str() << endl;
- BSONObj min = jsobj.getObjectField( "min" );
- BSONObj max = jsobj.getObjectField( "max" );
- if ( min.isEmpty() != max.isEmpty() ) {
- errmsg = "either provide both min and max or leave both empty";
+ errmsg = os.str();
return false;
}
+ }
+
+ return true;
+ }
+} cmdCheckShardingIndex;
+
+BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) {
+ return key.replaceFieldNames(keyPattern).clientReadable();
+}
+class SplitVector : public Command {
+public:
+ SplitVector() : Command("splitVector", false) {}
+ virtual bool slaveOk() const {
+ return false;
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual void help(stringstream& help) const {
+ help << "Internal command.\n"
+ "examples:\n"
+ " { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, "
+ "maxChunkSize:200 }\n"
+ " maxChunkSize unit in MBs\n"
+ " May optionally specify 'maxSplitPoints' and 'maxChunkObjects' to avoid "
+ "traversing the whole chunk\n"
+ " \n"
+ " { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, "
+ "force: true }\n"
+ " 'force' will produce one split point even if data is small; defaults to false\n"
+ "NOTE: This command may take a while to run";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource(
+ ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))),
+ ActionType::splitVector)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ }
+ return Status::OK();
+ }
+ virtual std::string parseNs(const string& dbname, const BSONObj& cmdObj) const {
+ return parseNsFullyQualified(dbname, cmdObj);
+ }
+ bool run(OperationContext* txn,
+ const string& dbname,
+ BSONObj& jsobj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ //
+ // 1.a We'll parse the parameters in two steps. First, make sure the we can use the split index to get
+ // a good approximation of the size of the chunk -- without needing to access the actual data.
+ //
+
+ const std::string ns = parseNs(dbname, jsobj);
+ BSONObj keyPattern = jsobj.getObjectField("keyPattern");
+
+ if (keyPattern.isEmpty()) {
+ errmsg = "no key pattern found in splitVector";
+ return false;
+ }
+
+ // If min and max are not provided use the "minKey" and "maxKey" for the sharding key pattern.
+ BSONObj min = jsobj.getObjectField("min");
+ BSONObj max = jsobj.getObjectField("max");
+ if (min.isEmpty() != max.isEmpty()) {
+ errmsg = "either provide both min and max or leave both empty";
+ return false;
+ }
+
+ long long maxSplitPoints = 0;
+ BSONElement maxSplitPointsElem = jsobj["maxSplitPoints"];
+ if (maxSplitPointsElem.isNumber()) {
+ maxSplitPoints = maxSplitPointsElem.numberLong();
+ }
+
+ long long maxChunkObjects = Chunk::MaxObjectPerChunk;
+ BSONElement MaxChunkObjectsElem = jsobj["maxChunkObjects"];
+ if (MaxChunkObjectsElem.isNumber()) {
+ maxChunkObjects = MaxChunkObjectsElem.numberLong();
+ }
+
+ vector<BSONObj> splitKeys;
+
+ {
+ // Get the size estimate for this namespace
AutoGetCollectionForRead ctx(txn, ns);
Collection* collection = ctx.getCollection();
- if ( !collection ) {
+ if (!collection) {
errmsg = "ns not found";
return false;
}
- IndexDescriptor *idx =
- collection->getIndexCatalog()->findIndexByPrefix( txn,
- keyPattern,
- true ); /* require single key */
- if ( idx == NULL ) {
- errmsg = "couldn't find valid index for shard key";
+ // Allow multiKey based on the invariant that shard keys must be single-valued.
+ // Therefore, any multi-key index prefixed by shard key cannot be multikey over
+ // the shard key fields.
+ IndexDescriptor* idx =
+ collection->getIndexCatalog()->findIndexByPrefix(txn, keyPattern, false);
+ if (idx == NULL) {
+ errmsg = (string) "couldn't find index over splitting key " +
+ keyPattern.clientReadable().toString();
return false;
}
// extend min to get (min, MinKey, MinKey, ....)
- KeyPattern kp( idx->keyPattern() );
- min = Helpers::toKeyFormat( kp.extendRangeBound( min, false ) );
- if ( max.isEmpty() ) {
+ KeyPattern kp(idx->keyPattern());
+ min = Helpers::toKeyFormat(kp.extendRangeBound(min, false));
+ if (max.isEmpty()) {
// if max not specified, make it (MaxKey, Maxkey, MaxKey...)
- max = Helpers::toKeyFormat( kp.extendRangeBound( max, true ) );
+ max = Helpers::toKeyFormat(kp.extendRangeBound(max, true));
} else {
// otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive
- max = Helpers::toKeyFormat( kp.extendRangeBound( max, false ) );
+ max = Helpers::toKeyFormat(kp.extendRangeBound(max, false));
}
- auto_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn, collection, idx,
- min, max, false,
- InternalPlanner::FORWARD));
- exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);
+ const long long recCount = collection->numRecords(txn);
+ const long long dataSize = collection->dataSize(txn);
- // Find the 'missingField' value used to represent a missing document field in a key of
- // this index.
- // NOTE A local copy of 'missingField' is made because indices may be
- // invalidated during a db lock yield.
- BSONObj missingFieldObj = IndexLegacy::getMissingField(txn, collection, idx->infoObj());
- BSONElement missingField = missingFieldObj.firstElement();
-
- // for now, the only check is that all shard keys are filled
- // a 'missingField' valued index key is ok if the field is present in the document,
- // TODO if $exist for nulls were picking the index, it could be used instead efficiently
- int keyPatternLength = keyPattern.nFields();
-
- RecordId loc;
- BSONObj currKey;
- while (PlanExecutor::ADVANCED == exec->getNext(&currKey, &loc)) {
- //check that current key contains non missing elements for all fields in keyPattern
- BSONObjIterator i( currKey );
- for( int k = 0; k < keyPatternLength ; k++ ) {
- if( ! i.more() ) {
- errmsg = str::stream() << "index key " << currKey
- << " too short for pattern " << keyPattern;
- return false;
+ //
+ // 1.b Now that we have the size estimate, go over the remaining parameters and apply any maximum size
+ // restrictions specified there.
+ //
+
+ // 'force'-ing a split is equivalent to having maxChunkSize be the size of the current chunk, i.e., the
+ // logic below will split that chunk in half
+ long long maxChunkSize = 0;
+ bool forceMedianSplit = false;
+ {
+ BSONElement maxSizeElem = jsobj["maxChunkSize"];
+ BSONElement forceElem = jsobj["force"];
+
+ if (forceElem.trueValue()) {
+ forceMedianSplit = true;
+ // This chunk size is effectively ignored if force is true
+ maxChunkSize = dataSize;
+
+ } else if (maxSizeElem.isNumber()) {
+ maxChunkSize = maxSizeElem.numberLong() * 1 << 20;
+
+ } else {
+ maxSizeElem = jsobj["maxChunkSizeBytes"];
+ if (maxSizeElem.isNumber()) {
+ maxChunkSize = maxSizeElem.numberLong();
}
- BSONElement currKeyElt = i.next();
-
- if ( !currKeyElt.eoo() && !currKeyElt.valuesEqual( missingField ) )
- continue;
-
- // This is a fetch, but it's OK. The underlying code won't throw a page fault
- // exception.
- BSONObj obj = collection->docFor(txn, loc).value();
- BSONObjIterator j( keyPattern );
- BSONElement real;
- for ( int x=0; x <= k; x++ )
- real = j.next();
-
- real = obj.getFieldDotted( real.fieldName() );
-
- if ( real.type() )
- continue;
-
- ostringstream os;
- os << "found missing value in key " << currKey << " for doc: "
- << ( obj.hasField( "_id" ) ? obj.toString() : obj["_id"].toString() );
- log() << "checkShardingIndex for '" << ns << "' failed: " << os.str() << endl;
-
- errmsg = os.str();
+ }
+
+ // We need a maximum size for the chunk, unless we're not actually capable of finding any
+ // split points.
+ if (maxChunkSize <= 0 && recCount != 0) {
+ errmsg =
+ "need to specify the desired max chunk size (maxChunkSize or "
+ "maxChunkSizeBytes)";
return false;
}
}
- return true;
- }
- } cmdCheckShardingIndex;
- BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) {
- return key.replaceFieldNames(keyPattern).clientReadable();
- }
+ // If there's not enough data for more than one chunk, no point continuing.
+ if (dataSize < maxChunkSize || recCount == 0) {
+ vector<BSONObj> emptyVector;
+ result.append("splitKeys", emptyVector);
+ return true;
+ }
- class SplitVector : public Command {
- public:
- SplitVector() : Command( "splitVector" , false ) {}
- virtual bool slaveOk() const { return false; }
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual void help( stringstream &help ) const {
- help <<
- "Internal command.\n"
- "examples:\n"
- " { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, maxChunkSize:200 }\n"
- " maxChunkSize unit in MBs\n"
- " May optionally specify 'maxSplitPoints' and 'maxChunkObjects' to avoid traversing the whole chunk\n"
- " \n"
- " { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, force: true }\n"
- " 'force' will produce one split point even if data is small; defaults to false\n"
- "NOTE: This command may take a while to run";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource(
- ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))),
- ActionType::splitVector)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ log() << "request split points lookup for chunk " << ns << " " << min << " -->> " << max
+ << endl;
+
+ // We'll use the average object size and number of object to find approximately how many keys
+ // each chunk should have. We'll split at half the maxChunkSize or maxChunkObjects, if
+ // provided.
+ const long long avgRecSize = dataSize / recCount;
+ long long keyCount = maxChunkSize / (2 * avgRecSize);
+ if (maxChunkObjects && (maxChunkObjects < keyCount)) {
+ log() << "limiting split vector to " << maxChunkObjects << " (from " << keyCount
+ << ") objects " << endl;
+ keyCount = maxChunkObjects;
}
- return Status::OK();
- }
- virtual std::string parseNs(const string& dbname, const BSONObj& cmdObj) const {
- return parseNsFullyQualified(dbname, cmdObj);
- }
- bool run(OperationContext* txn, const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
//
- // 1.a We'll parse the parameters in two steps. First, make sure the we can use the split index to get
- // a good approximation of the size of the chunk -- without needing to access the actual data.
+ // 2. Traverse the index and add the keyCount-th key to the result vector. If that key
+ // appeared in the vector before, we omit it. The invariant here is that all the
+ // instances of a given key value live in the same chunk.
//
- const std::string ns = parseNs(dbname, jsobj);
- BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
+ Timer timer;
+ long long currCount = 0;
+ long long numChunks = 0;
- if ( keyPattern.isEmpty() ) {
- errmsg = "no key pattern found in splitVector";
- return false;
- }
+ auto_ptr<PlanExecutor> exec(InternalPlanner::indexScan(
+ txn, collection, idx, min, max, false, InternalPlanner::FORWARD));
- // If min and max are not provided use the "minKey" and "maxKey" for the sharding key pattern.
- BSONObj min = jsobj.getObjectField( "min" );
- BSONObj max = jsobj.getObjectField( "max" );
- if ( min.isEmpty() != max.isEmpty() ){
- errmsg = "either provide both min and max or leave both empty";
+ BSONObj currKey;
+ PlanExecutor::ExecState state = exec->getNext(&currKey, NULL);
+ if (PlanExecutor::ADVANCED != state) {
+ errmsg = "can't open a cursor for splitting (desired range is possibly empty)";
return false;
}
- long long maxSplitPoints = 0;
- BSONElement maxSplitPointsElem = jsobj[ "maxSplitPoints" ];
- if ( maxSplitPointsElem.isNumber() ) {
- maxSplitPoints = maxSplitPointsElem.numberLong();
- }
-
- long long maxChunkObjects = Chunk::MaxObjectPerChunk;
- BSONElement MaxChunkObjectsElem = jsobj[ "maxChunkObjects" ];
- if ( MaxChunkObjectsElem.isNumber() ) {
- maxChunkObjects = MaxChunkObjectsElem.numberLong();
- }
+ // Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, to be removed
+ // at the end. If a key appears more times than entries allowed on a chunk, we issue a warning and
+ // split on the following key.
+ set<BSONObj> tooFrequentKeys;
+ splitKeys.push_back(
+ prettyKey(idx->keyPattern(), currKey.getOwned()).extractFields(keyPattern));
- vector<BSONObj> splitKeys;
+ exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);
+ while (1) {
+ while (PlanExecutor::ADVANCED == state) {
+ currCount++;
+
+ if (currCount > keyCount && !forceMedianSplit) {
+ currKey = prettyKey(idx->keyPattern(), currKey.getOwned())
+ .extractFields(keyPattern);
+ // Do not use this split key if it is the same used in the previous split point.
+ if (currKey.woCompare(splitKeys.back()) == 0) {
+ tooFrequentKeys.insert(currKey.getOwned());
+ } else {
+ splitKeys.push_back(currKey.getOwned());
+ currCount = 0;
+ numChunks++;
+ LOG(4) << "picked a split key: " << currKey << endl;
+ }
+ }
- {
- // Get the size estimate for this namespace
- AutoGetCollectionForRead ctx(txn, ns);
- Collection* collection = ctx.getCollection();
- if ( !collection ) {
- errmsg = "ns not found";
- return false;
- }
+ // Stop if we have enough split points.
+ if (maxSplitPoints && (numChunks >= maxSplitPoints)) {
+ log() << "max number of requested split points reached (" << numChunks
+ << ") before the end of chunk " << ns << " " << min << " -->> " << max
+ << endl;
+ break;
+ }
- // Allow multiKey based on the invariant that shard keys must be single-valued.
- // Therefore, any multi-key index prefixed by shard key cannot be multikey over
- // the shard key fields.
- IndexDescriptor *idx =
- collection->getIndexCatalog()->findIndexByPrefix( txn,
- keyPattern,
- false );
- if ( idx == NULL ) {
- errmsg = (string)"couldn't find index over splitting key " +
- keyPattern.clientReadable().toString();
- return false;
- }
- // extend min to get (min, MinKey, MinKey, ....)
- KeyPattern kp( idx->keyPattern() );
- min = Helpers::toKeyFormat( kp.extendRangeBound ( min, false ) );
- if ( max.isEmpty() ) {
- // if max not specified, make it (MaxKey, Maxkey, MaxKey...)
- max = Helpers::toKeyFormat( kp.extendRangeBound( max, true ) );
- } else {
- // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive
- max = Helpers::toKeyFormat( kp.extendRangeBound( max, false ) );
+ state = exec->getNext(&currKey, NULL);
}
- const long long recCount = collection->numRecords(txn);
- const long long dataSize = collection->dataSize(txn);
+ if (!forceMedianSplit)
+ break;
//
- // 1.b Now that we have the size estimate, go over the remaining parameters and apply any maximum size
- // restrictions specified there.
+ // If we're forcing a split at the halfway point, then the first pass was just
+ // to count the keys, and we still need a second pass.
//
-
- // 'force'-ing a split is equivalent to having maxChunkSize be the size of the current chunk, i.e., the
- // logic below will split that chunk in half
- long long maxChunkSize = 0;
- bool forceMedianSplit = false;
- {
- BSONElement maxSizeElem = jsobj[ "maxChunkSize" ];
- BSONElement forceElem = jsobj[ "force" ];
-
- if ( forceElem.trueValue() ) {
- forceMedianSplit = true;
- // This chunk size is effectively ignored if force is true
- maxChunkSize = dataSize;
-
- }
- else if ( maxSizeElem.isNumber() ) {
- maxChunkSize = maxSizeElem.numberLong() * 1<<20;
-
- }
- else {
- maxSizeElem = jsobj["maxChunkSizeBytes"];
- if ( maxSizeElem.isNumber() ) {
- maxChunkSize = maxSizeElem.numberLong();
- }
- }
- // We need a maximum size for the chunk, unless we're not actually capable of finding any
- // split points.
- if ( maxChunkSize <= 0 && recCount != 0 ) {
- errmsg = "need to specify the desired max chunk size (maxChunkSize or maxChunkSizeBytes)";
- return false;
- }
- }
-
-
- // If there's not enough data for more than one chunk, no point continuing.
- if ( dataSize < maxChunkSize || recCount == 0 ) {
- vector<BSONObj> emptyVector;
- result.append( "splitKeys" , emptyVector );
- return true;
- }
-
- log() << "request split points lookup for chunk " << ns << " " << min << " -->> " << max << endl;
-
- // We'll use the average object size and number of object to find approximately how many keys
- // each chunk should have. We'll split at half the maxChunkSize or maxChunkObjects, if
- // provided.
- const long long avgRecSize = dataSize / recCount;
- long long keyCount = maxChunkSize / (2 * avgRecSize);
- if ( maxChunkObjects && ( maxChunkObjects < keyCount ) ) {
- log() << "limiting split vector to " << maxChunkObjects << " (from " << keyCount << ") objects " << endl;
- keyCount = maxChunkObjects;
- }
-
- //
- // 2. Traverse the index and add the keyCount-th key to the result vector. If that key
- // appeared in the vector before, we omit it. The invariant here is that all the
- // instances of a given key value live in the same chunk.
- //
-
- Timer timer;
- long long currCount = 0;
- long long numChunks = 0;
-
- auto_ptr<PlanExecutor> exec(
- InternalPlanner::indexScan(txn, collection, idx, min, max,
- false, InternalPlanner::FORWARD));
-
- BSONObj currKey;
- PlanExecutor::ExecState state = exec->getNext(&currKey, NULL);
- if (PlanExecutor::ADVANCED != state) {
- errmsg = "can't open a cursor for splitting (desired range is possibly empty)";
- return false;
- }
-
- // Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, to be removed
- // at the end. If a key appears more times than entries allowed on a chunk, we issue a warning and
- // split on the following key.
- set<BSONObj> tooFrequentKeys;
- splitKeys.push_back(prettyKey(idx->keyPattern(), currKey.getOwned()).extractFields( keyPattern ) );
+ forceMedianSplit = false;
+ keyCount = currCount / 2;
+ currCount = 0;
+ log() << "splitVector doing another cycle because of force, keyCount now: "
+ << keyCount << endl;
+
+ exec.reset(InternalPlanner::indexScan(
+ txn, collection, idx, min, max, false, InternalPlanner::FORWARD));
exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);
- while ( 1 ) {
- while (PlanExecutor::ADVANCED == state) {
- currCount++;
-
- if ( currCount > keyCount && !forceMedianSplit ) {
- currKey = prettyKey(idx->keyPattern(), currKey.getOwned()).extractFields(keyPattern);
- // Do not use this split key if it is the same used in the previous split point.
- if ( currKey.woCompare( splitKeys.back() ) == 0 ) {
- tooFrequentKeys.insert( currKey.getOwned() );
- }
- else {
- splitKeys.push_back( currKey.getOwned() );
- currCount = 0;
- numChunks++;
- LOG(4) << "picked a split key: " << currKey << endl;
- }
- }
+ state = exec->getNext(&currKey, NULL);
+ }
- // Stop if we have enough split points.
- if ( maxSplitPoints && ( numChunks >= maxSplitPoints ) ) {
- log() << "max number of requested split points reached (" << numChunks
- << ") before the end of chunk " << ns << " " << min << " -->> " << max
- << endl;
- break;
- }
+ //
+ // 3. Format the result and issue any warnings about the data we gathered while traversing the
+ // index
+ //
- state = exec->getNext(&currKey, NULL);
- }
-
- if ( ! forceMedianSplit )
- break;
-
- //
- // If we're forcing a split at the halfway point, then the first pass was just
- // to count the keys, and we still need a second pass.
- //
+ // Warn for keys that are more numerous than maxChunkSize allows.
+ for (set<BSONObj>::const_iterator it = tooFrequentKeys.begin();
+ it != tooFrequentKeys.end();
+ ++it) {
+ warning() << "chunk is larger than " << maxChunkSize << " bytes because of key "
+ << prettyKey(idx->keyPattern(), *it) << endl;
+ }
- forceMedianSplit = false;
- keyCount = currCount / 2;
- currCount = 0;
- log() << "splitVector doing another cycle because of force, keyCount now: " << keyCount << endl;
+ // Remove the sentinel at the beginning before returning
+ splitKeys.erase(splitKeys.begin());
- exec.reset(InternalPlanner::indexScan(txn, collection, idx, min, max,
- false, InternalPlanner::FORWARD));
+ if (timer.millis() > serverGlobalParams.slowMS) {
+ warning() << "Finding the split vector for " << ns << " over " << keyPattern
+ << " keyCount: " << keyCount << " numSplits: " << splitKeys.size()
+ << " lookedAt: " << currCount << " took " << timer.millis() << "ms"
+ << endl;
+ }
- exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);
- state = exec->getNext(&currKey, NULL);
- }
+ // Warning: we are sending back an array of keys but are currently limited to
+ // 4MB work of 'result' size. This should be okay for now.
- //
- // 3. Format the result and issue any warnings about the data we gathered while traversing the
- // index
- //
-
- // Warn for keys that are more numerous than maxChunkSize allows.
- for ( set<BSONObj>::const_iterator it = tooFrequentKeys.begin(); it != tooFrequentKeys.end(); ++it ) {
- warning() << "chunk is larger than " << maxChunkSize
- << " bytes because of key " << prettyKey(idx->keyPattern(), *it ) << endl;
- }
-
- // Remove the sentinel at the beginning before returning
- splitKeys.erase( splitKeys.begin() );
-
- if (timer.millis() > serverGlobalParams.slowMS) {
- warning() << "Finding the split vector for " << ns << " over "<< keyPattern
- << " keyCount: " << keyCount << " numSplits: " << splitKeys.size()
- << " lookedAt: " << currCount << " took " << timer.millis() << "ms"
- << endl;
- }
-
- // Warning: we are sending back an array of keys but are currently limited to
- // 4MB work of 'result' size. This should be okay for now.
+ result.append("timeMillis", timer.millis());
+ }
- result.append( "timeMillis", timer.millis() );
- }
+ result.append("splitKeys", splitKeys);
+ return true;
+ }
+} cmdSplitVector;
+
+class SplitChunkCommand : public Command {
+public:
+ SplitChunkCommand() : Command("splitChunk") {}
+ virtual void help(stringstream& help) const {
+ help << "internal command usage only\n"
+ "example:\n"
+ " { splitChunk:\"db.foo\" , keyPattern: {a:1} , min : {a:100} , max: {a:200} { "
+ "splitKeys : [ {a:150} , ... ]}";
+ }
- result.append( "splitKeys" , splitKeys );
- return true;
+ virtual bool slaveOk() const {
+ return false;
+ }
+ virtual bool adminOnly() const {
+ return true;
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource(
+ ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))),
+ ActionType::splitChunk)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ }
+ return Status::OK();
+ }
+ virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const {
+ return parseNsFullyQualified(dbname, cmdObj);
+ }
+ bool run(OperationContext* txn,
+ const string& dbname,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ //
+ // 1. check whether parameters passed to splitChunk are sound
+ //
+
+ const string ns = parseNs(dbname, cmdObj);
+ if (ns.empty()) {
+ errmsg = "need to specify namespace in command";
+ return false;
+ }
+ const BSONObj keyPattern = cmdObj["keyPattern"].Obj();
+ if (keyPattern.isEmpty()) {
+ errmsg = "need to specify the key pattern the collection is sharded over";
+ return false;
}
- } cmdSplitVector;
-
- class SplitChunkCommand : public Command {
- public:
- SplitChunkCommand() : Command( "splitChunk" ) {}
- virtual void help( stringstream& help ) const {
- help <<
- "internal command usage only\n"
- "example:\n"
- " { splitChunk:\"db.foo\" , keyPattern: {a:1} , min : {a:100} , max: {a:200} { splitKeys : [ {a:150} , ... ]}";
- }
-
- virtual bool slaveOk() const { return false; }
- virtual bool adminOnly() const { return true; }
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource(
- ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))),
- ActionType::splitChunk)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
+
+ const BSONObj min = cmdObj["min"].Obj();
+ if (min.isEmpty()) {
+ errmsg = "need to specify the min key for the chunk";
+ return false;
}
- virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const {
- return parseNsFullyQualified(dbname, cmdObj);
+
+ const BSONObj max = cmdObj["max"].Obj();
+ if (max.isEmpty()) {
+ errmsg = "need to specify the max key for the chunk";
+ return false;
}
- bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
- //
- // 1. check whether parameters passed to splitChunk are sound
- //
+ const string shardName = cmdObj["from"].str();
+ if (shardName.empty()) {
+ errmsg = "need specify server to split chunk at";
+ return false;
+ }
- const string ns = parseNs(dbname, cmdObj);
- if ( ns.empty() ) {
- errmsg = "need to specify namespace in command";
- return false;
- }
+ const BSONObj splitKeysElem = cmdObj["splitKeys"].Obj();
+ if (splitKeysElem.isEmpty()) {
+ errmsg = "need to provide the split points to chunk over";
+ return false;
+ }
+ vector<BSONObj> splitKeys;
+ BSONObjIterator it(splitKeysElem);
+ while (it.more()) {
+ splitKeys.push_back(it.next().Obj().getOwned());
+ }
- const BSONObj keyPattern = cmdObj["keyPattern"].Obj();
- if ( keyPattern.isEmpty() ) {
- errmsg = "need to specify the key pattern the collection is sharded over";
- return false;
- }
+ const BSONElement shardId = cmdObj["shardId"];
+ if (shardId.eoo()) {
+ errmsg = "need to provide shardId";
+ return false;
+ }
- const BSONObj min = cmdObj["min"].Obj();
- if ( min.isEmpty() ) {
- errmsg = "need to specify the min key for the chunk";
- return false;
- }
+ //
+ // Get sharding state up-to-date
+ //
- const BSONObj max = cmdObj["max"].Obj();
- if ( max.isEmpty() ) {
- errmsg = "need to specify the max key for the chunk";
+ // This could be the first call that enables sharding - make sure we initialize the
+ // sharding state for this shard.
+ if (!shardingState.enabled()) {
+ if (cmdObj["configdb"].type() != String) {
+ errmsg = "sharding not enabled";
+ warning() << errmsg << endl;
return false;
}
+ string configdb = cmdObj["configdb"].String();
+ ShardingState::initialize(configdb);
+ }
- const string shardName = cmdObj["from"].str();
- if ( shardName.empty() ) {
- errmsg = "need specify server to split chunk at";
- return false;
- }
+ // Initialize our current shard name in the shard state if needed
+ shardingState.gotShardName(shardName);
- const BSONObj splitKeysElem = cmdObj["splitKeys"].Obj();
- if ( splitKeysElem.isEmpty() ) {
- errmsg = "need to provide the split points to chunk over";
- return false;
- }
- vector<BSONObj> splitKeys;
- BSONObjIterator it( splitKeysElem );
- while ( it.more() ) {
- splitKeys.push_back( it.next().Obj().getOwned() );
- }
+ ConnectionString configLoc =
+ ConnectionString::parse(shardingState.getConfigServer(), errmsg);
+ if (!configLoc.isValid()) {
+ warning() << errmsg;
+ return false;
+ }
- const BSONElement shardId = cmdObj["shardId"];
- if ( shardId.eoo() ) {
- errmsg = "need to provide shardId";
- return false;
- }
+ log() << "received splitChunk request: " << cmdObj;
- //
- // Get sharding state up-to-date
- //
+ //
+ // 2. lock the collection's metadata and get highest version for the current shard
+ //
- // This could be the first call that enables sharding - make sure we initialize the
- // sharding state for this shard.
- if ( ! shardingState.enabled() ) {
- if ( cmdObj["configdb"].type() != String ) {
- errmsg = "sharding not enabled";
- warning() << errmsg << endl;
- return false;
- }
- string configdb = cmdObj["configdb"].String();
- ShardingState::initialize(configdb);
- }
+ ScopedDistributedLock collLock(configLoc, ns);
+ collLock.setLockMessage(str::stream() << "splitting chunk [" << minKey << ", " << maxKey
+ << ") in " << ns);
- // Initialize our current shard name in the shard state if needed
- shardingState.gotShardName(shardName);
+ Status acquisitionStatus = collLock.tryAcquire();
+ if (!acquisitionStatus.isOK()) {
+ errmsg = str::stream() << "could not acquire collection lock for " << ns
+ << " to split chunk [" << minKey << "," << maxKey << ")"
+ << causedBy(acquisitionStatus);
- ConnectionString configLoc = ConnectionString::parse(shardingState.getConfigServer(),
- errmsg);
- if (!configLoc.isValid()) {
- warning() << errmsg;
- return false;
- }
+ warning() << errmsg << endl;
+ return false;
+ }
- log() << "received splitChunk request: " << cmdObj;
+ // Always check our version remotely
+ ChunkVersion shardVersion;
+ Status refreshStatus = shardingState.refreshMetadataNow(txn, ns, &shardVersion);
- //
- // 2. lock the collection's metadata and get highest version for the current shard
- //
+ if (!refreshStatus.isOK()) {
+ errmsg = str::stream() << "splitChunk cannot split chunk "
+ << "[" << minKey << "," << maxKey << ")"
+ << causedBy(refreshStatus.reason());
+
+ warning() << errmsg;
+ return false;
+ }
- ScopedDistributedLock collLock(configLoc, ns);
- collLock.setLockMessage(str::stream() << "splitting chunk [" << minKey << ", " << maxKey
- << ") in " << ns);
+ if (shardVersion.majorVersion() == 0) {
+ // It makes no sense to split if our version is zero and we have no chunks
+ errmsg = str::stream() << "splitChunk cannot split chunk "
+ << "[" << minKey << "," << maxKey << ")"
+ << " with zero shard version";
- Status acquisitionStatus = collLock.tryAcquire();
- if (!acquisitionStatus.isOK()) {
- errmsg = str::stream() << "could not acquire collection lock for " << ns
- << " to split chunk [" << minKey << "," << maxKey << ")"
- << causedBy(acquisitionStatus);
+ warning() << errmsg;
+ return false;
+ }
- warning() << errmsg << endl;
+ // From mongos >= v3.0.
+ BSONElement epochElem(cmdObj["epoch"]);
+ if (epochElem.type() == jstOID) {
+ OID cmdEpoch = epochElem.OID();
+
+ if (cmdEpoch != shardVersion.epoch()) {
+ errmsg = str::stream() << "splitChunk cannot split chunk "
+ << "[" << minKey << "," << maxKey << "), "
+ << "collection may have been dropped. "
+ << "current epoch: " << shardVersion.epoch()
+ << ", cmd epoch: " << cmdEpoch;
+ warning() << errmsg;
return false;
}
+ }
- // Always check our version remotely
- ChunkVersion shardVersion;
- Status refreshStatus = shardingState.refreshMetadataNow(txn, ns, &shardVersion);
+ // Get collection metadata
+ const CollectionMetadataPtr collMetadata(shardingState.getCollectionMetadata(ns));
+ // With nonzero shard version, we must have metadata
+ invariant(NULL != collMetadata);
- if (!refreshStatus.isOK()) {
+ ChunkVersion collVersion = collMetadata->getCollVersion();
+ // With nonzero shard version, we must have a coll version >= our shard version
+ invariant(collVersion >= shardVersion);
- errmsg = str::stream() << "splitChunk cannot split chunk " << "[" << minKey << ","
- << maxKey << ")" << causedBy(refreshStatus.reason());
+ ChunkType origChunk;
+ if (!collMetadata->getNextChunk(min, &origChunk) || origChunk.getMin().woCompare(min) ||
+ origChunk.getMax().woCompare(max)) {
+ // Our boundaries are different from those passed in
+ errmsg = str::stream() << "splitChunk cannot find chunk "
+ << "[" << minKey << "," << maxKey << ")"
+ << " to split, the chunk boundaries may be stale";
- warning() << errmsg;
- return false;
- }
+ warning() << errmsg;
+ return false;
+ }
- if (shardVersion.majorVersion() == 0) {
+ log() << "splitChunk accepted at version " << shardVersion;
- // It makes no sense to split if our version is zero and we have no chunks
- errmsg = str::stream() << "splitChunk cannot split chunk " << "[" << minKey << ","
- << maxKey << ")" << " with zero shard version";
+ //
+ // 3. create the batch of updates to metadata ( the new chunks ) to be applied via 'applyOps' command
+ //
- warning() << errmsg;
- return false;
- }
+ BSONObjBuilder logDetail;
+ appendShortVersion(logDetail.subobjStart("before"), origChunk);
+ LOG(1) << "before split on " << origChunk << endl;
+ OwnedPointerVector<ChunkType> newChunks;
- // From mongos >= v3.0.
- BSONElement epochElem(cmdObj["epoch"]);
- if (epochElem.type() == jstOID) {
- OID cmdEpoch = epochElem.OID();
-
- if (cmdEpoch != shardVersion.epoch()) {
- errmsg = str::stream() << "splitChunk cannot split chunk "
- << "[" << minKey << ","
- << maxKey << "), "
- << "collection may have been dropped. "
- << "current epoch: " << shardVersion.epoch()
- << ", cmd epoch: " << cmdEpoch;
- warning() << errmsg;
- return false;
- }
- }
+ ChunkVersion nextChunkVersion = collVersion;
+ BSONObj startKey = min;
+ splitKeys.push_back(max); // makes it easier to have 'max' in the next loop. remove later.
- // Get collection metadata
- const CollectionMetadataPtr collMetadata(shardingState.getCollectionMetadata(ns));
- // With nonzero shard version, we must have metadata
- invariant(NULL != collMetadata);
+ BSONObjBuilder cmdBuilder;
+ BSONArrayBuilder updates(cmdBuilder.subarrayStart("applyOps"));
- ChunkVersion collVersion = collMetadata->getCollVersion();
- // With nonzero shard version, we must have a coll version >= our shard version
- invariant(collVersion >= shardVersion);
+ for (vector<BSONObj>::const_iterator it = splitKeys.begin(); it != splitKeys.end(); ++it) {
+ BSONObj endKey = *it;
- ChunkType origChunk;
- if (!collMetadata->getNextChunk(min, &origChunk)
- || origChunk.getMin().woCompare(min) || origChunk.getMax().woCompare(max)) {
+ if (endKey.woCompare(startKey) == 0) {
+ errmsg = str::stream() << "split on the lower bound of chunk "
+ << "[" << min << ", " << max << ")"
+ << " is not allowed";
- // Our boundaries are different from those passed in
- errmsg = str::stream() << "splitChunk cannot find chunk "
- << "[" << minKey << "," << maxKey << ")"
- << " to split, the chunk boundaries may be stale";
+ warning() << errmsg << endl;
+ return false;
+ }
+ // Make sure splits don't create too-big shard keys
+ Status status = ShardKeyPattern::checkShardKeySize(endKey);
+ if (!status.isOK()) {
+ errmsg = status.reason();
warning() << errmsg;
return false;
}
- log() << "splitChunk accepted at version " << shardVersion;
-
- //
- // 3. create the batch of updates to metadata ( the new chunks ) to be applied via 'applyOps' command
- //
+ // splits only update the 'minor' portion of version
+ nextChunkVersion.incMinor();
- BSONObjBuilder logDetail;
- appendShortVersion(logDetail.subobjStart("before"), origChunk);
- LOG(1) << "before split on " << origChunk << endl;
- OwnedPointerVector<ChunkType> newChunks;
+ // build an update operation against the chunks collection of the config database with
+ // upsert true
+ BSONObjBuilder op;
+ op.append("op", "u");
+ op.appendBool("b", true);
+ op.append("ns", ChunkType::ConfigNS);
- ChunkVersion nextChunkVersion = collVersion;
- BSONObj startKey = min;
- splitKeys.push_back( max ); // makes it easier to have 'max' in the next loop. remove later.
+ // add the modified (new) chunk information as the update object
+ BSONObjBuilder n(op.subobjStart("o"));
+ n.append(ChunkType::name(), Chunk::genID(ns, startKey));
+ nextChunkVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod());
+ n.append(ChunkType::ns(), ns);
+ n.append(ChunkType::min(), startKey);
+ n.append(ChunkType::max(), endKey);
+ n.append(ChunkType::shard(), shardName);
+ n.done();
- BSONObjBuilder cmdBuilder;
- BSONArrayBuilder updates( cmdBuilder.subarrayStart( "applyOps" ) );
+ // add the chunk's _id as the query part of the update statement
+ BSONObjBuilder q(op.subobjStart("o2"));
+ q.append(ChunkType::name(), Chunk::genID(ns, startKey));
+ q.done();
- for ( vector<BSONObj>::const_iterator it = splitKeys.begin(); it != splitKeys.end(); ++it ) {
- BSONObj endKey = *it;
+ updates.append(op.obj());
- if ( endKey.woCompare( startKey ) == 0) {
- errmsg = str::stream() << "split on the lower bound of chunk "
- << "[" << min << ", " << max << ")"
- << " is not allowed";
+ // remember this chunk info for logging later
+ auto_ptr<ChunkType> chunk(new ChunkType());
+ chunk->setMin(startKey);
+ chunk->setMax(endKey);
+ chunk->setVersion(nextChunkVersion);
- warning() << errmsg << endl;
- return false;
- }
+ newChunks.push_back(chunk.release());
- // Make sure splits don't create too-big shard keys
- Status status = ShardKeyPattern::checkShardKeySize(endKey);
- if (!status.isOK()) {
- errmsg = status.reason();
- warning() << errmsg;
- return false;
- }
-
- // splits only update the 'minor' portion of version
- nextChunkVersion.incMinor();
-
- // build an update operation against the chunks collection of the config database with
- // upsert true
- BSONObjBuilder op;
- op.append( "op" , "u" );
- op.appendBool( "b" , true );
- op.append( "ns" , ChunkType::ConfigNS );
-
- // add the modified (new) chunk information as the update object
- BSONObjBuilder n( op.subobjStart( "o" ) );
- n.append(ChunkType::name(), Chunk::genID(ns, startKey));
- nextChunkVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod());
- n.append(ChunkType::ns(), ns);
- n.append(ChunkType::min(), startKey);
- n.append(ChunkType::max(), endKey);
- n.append(ChunkType::shard(), shardName);
- n.done();
-
- // add the chunk's _id as the query part of the update statement
- BSONObjBuilder q( op.subobjStart( "o2" ) );
- q.append(ChunkType::name(), Chunk::genID(ns, startKey));
- q.done();
-
- updates.append( op.obj() );
-
- // remember this chunk info for logging later
- auto_ptr<ChunkType> chunk(new ChunkType());
- chunk->setMin(startKey);
- chunk->setMax(endKey);
- chunk->setVersion(nextChunkVersion);
-
- newChunks.push_back(chunk.release());
-
- startKey = endKey;
- }
+ startKey = endKey;
+ }
- splitKeys.pop_back(); // 'max' was used as sentinel
+ splitKeys.pop_back(); // 'max' was used as sentinel
- updates.done();
+ updates.done();
+ {
+ BSONArrayBuilder preCond(cmdBuilder.subarrayStart("preCondition"));
+ BSONObjBuilder b;
+ b.append("ns", ChunkType::ConfigNS);
+ b.append("q",
+ BSON("query" << BSON(ChunkType::ns(ns)) << "orderby"
+ << BSON(ChunkType::DEPRECATED_lastmod() << -1)));
{
- BSONArrayBuilder preCond( cmdBuilder.subarrayStart( "preCondition" ) );
- BSONObjBuilder b;
- b.append("ns", ChunkType::ConfigNS);
- b.append("q", BSON("query" << BSON(ChunkType::ns(ns)) <<
- "orderby" << BSON(ChunkType::DEPRECATED_lastmod() << -1)));
- {
- BSONObjBuilder bb( b.subobjStart( "res" ) );
- // TODO: For backwards compatibility, we can't yet require an epoch here
- bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), collVersion.toLong());
- bb.done();
- }
- preCond.append( b.obj() );
- preCond.done();
+ BSONObjBuilder bb(b.subobjStart("res"));
+ // TODO: For backwards compatibility, we can't yet require an epoch here
+ bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), collVersion.toLong());
+ bb.done();
}
+ preCond.append(b.obj());
+ preCond.done();
+ }
- //
- // 4. apply the batch of updates to remote and local metadata
- //
+ //
+ // 4. apply the batch of updates to remote and local metadata
+ //
- BSONObj cmd = cmdBuilder.obj();
+ BSONObj cmd = cmdBuilder.obj();
- LOG(1) << "splitChunk update: " << cmd << endl;
+ LOG(1) << "splitChunk update: " << cmd << endl;
- bool ok;
- BSONObj cmdResult;
- {
- ScopedDbConnection conn(shardingState.getConfigServer(), 30);
- ok = conn->runCommand( "config" , cmd , cmdResult );
- conn.done();
- }
+ bool ok;
+ BSONObj cmdResult;
+ {
+ ScopedDbConnection conn(shardingState.getConfigServer(), 30);
+ ok = conn->runCommand("config", cmd, cmdResult);
+ conn.done();
+ }
- if ( ! ok ) {
- stringstream ss;
- ss << "saving chunks failed. cmd: " << cmd << " result: " << cmdResult;
- error() << ss.str() << endl;
- msgasserted( 13593 , ss.str() );
- }
+ if (!ok) {
+ stringstream ss;
+ ss << "saving chunks failed. cmd: " << cmd << " result: " << cmdResult;
+ error() << ss.str() << endl;
+ msgasserted(13593, ss.str());
+ }
- //
- // Install chunk metadata with knowledge about newly split chunks in this shard's state
- //
-
- {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock writeLk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_IX);
- Lock::CollectionLock collLock(txn->lockState(), ns, MODE_X);
+ //
+ // Install chunk metadata with knowledge about newly split chunks in this shard's state
+ //
- // NOTE: The newShardVersion resulting from this split is higher than any
- // other chunk version, so it's also implicitly the newCollVersion
- ChunkVersion newShardVersion = collVersion;
+ {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock writeLk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_IX);
+ Lock::CollectionLock collLock(txn->lockState(), ns, MODE_X);
- // Increment the minor version once, shardingState.splitChunk increments once
- // per split point (resulting in the correct final shard/collection version)
- // TODO: Revisit this interface, it's a bit clunky
- newShardVersion.incMinor();
+ // NOTE: The newShardVersion resulting from this split is higher than any
+ // other chunk version, so it's also implicitly the newCollVersion
+ ChunkVersion newShardVersion = collVersion;
- shardingState.splitChunk(txn, ns, min, max, splitKeys, newShardVersion);
- }
+ // Increment the minor version once, shardingState.splitChunk increments once
+ // per split point (resulting in the correct final shard/collection version)
+ // TODO: Revisit this interface, it's a bit clunky
+ newShardVersion.incMinor();
- //
- // 5. logChanges
- //
+ shardingState.splitChunk(txn, ns, min, max, splitKeys, newShardVersion);
+ }
- // single splits are logged different than multisplits
- if ( newChunks.size() == 2 ) {
- appendShortVersion(logDetail.subobjStart("left"), *newChunks[0]);
- appendShortVersion(logDetail.subobjStart("right"), *newChunks[1]);
- configServer.logChange( "split" , ns , logDetail.obj() );
+ //
+ // 5. logChanges
+ //
+
+ // single splits are logged different than multisplits
+ if (newChunks.size() == 2) {
+ appendShortVersion(logDetail.subobjStart("left"), *newChunks[0]);
+ appendShortVersion(logDetail.subobjStart("right"), *newChunks[1]);
+ configServer.logChange("split", ns, logDetail.obj());
+ } else {
+ BSONObj beforeDetailObj = logDetail.obj();
+ BSONObj firstDetailObj = beforeDetailObj.getOwned();
+ const int newChunksSize = newChunks.size();
+
+ for (int i = 0; i < newChunksSize; i++) {
+ BSONObjBuilder chunkDetail;
+ chunkDetail.appendElements(beforeDetailObj);
+ chunkDetail.append("number", i + 1);
+ chunkDetail.append("of", newChunksSize);
+ appendShortVersion(chunkDetail.subobjStart("chunk"), *newChunks[i]);
+ configServer.logChange("multi-split", ns, chunkDetail.obj());
}
- else {
- BSONObj beforeDetailObj = logDetail.obj();
- BSONObj firstDetailObj = beforeDetailObj.getOwned();
- const int newChunksSize = newChunks.size();
-
- for ( int i=0; i < newChunksSize; i++ ) {
- BSONObjBuilder chunkDetail;
- chunkDetail.appendElements( beforeDetailObj );
- chunkDetail.append( "number", i+1 );
- chunkDetail.append( "of" , newChunksSize );
- appendShortVersion(chunkDetail.subobjStart("chunk"), *newChunks[i]);
- configServer.logChange( "multi-split" , ns , chunkDetail.obj() );
- }
- }
-
- dassert(newChunks.size() > 1);
+ }
- {
- AutoGetCollectionForRead ctx(txn, ns);
- Collection* collection = ctx.getCollection();
- if (!collection) {
- warning() << "will not perform top-chunk checking since " << ns
- << " does not exist after splitting";
- return true;
- }
+ dassert(newChunks.size() > 1);
- // Allow multiKey based on the invariant that shard keys must be
- // single-valued. Therefore, any multi-key index prefixed by shard
- // key cannot be multikey over the shard key fields.
- IndexDescriptor *idx =
- collection->getIndexCatalog()->findIndexByPrefix(txn, keyPattern, false);
+ {
+ AutoGetCollectionForRead ctx(txn, ns);
+ Collection* collection = ctx.getCollection();
+ if (!collection) {
+ warning() << "will not perform top-chunk checking since " << ns
+ << " does not exist after splitting";
+ return true;
+ }
- if (idx == NULL) {
- return true;
- }
+ // Allow multiKey based on the invariant that shard keys must be
+ // single-valued. Therefore, any multi-key index prefixed by shard
+ // key cannot be multikey over the shard key fields.
+ IndexDescriptor* idx =
+ collection->getIndexCatalog()->findIndexByPrefix(txn, keyPattern, false);
- const ChunkType* backChunk = newChunks.vector().back();
- const ChunkType* frontChunk = newChunks.vector().front();
+ if (idx == NULL) {
+ return true;
+ }
- if (checkIfSingleDoc(txn, collection, idx, backChunk)) {
- result.append("shouldMigrate",
- BSON("min" << backChunk->getMin()
- << "max" << backChunk->getMax()));
- }
- else if (checkIfSingleDoc(txn, collection, idx, frontChunk)) {
- result.append("shouldMigrate",
- BSON("min" << frontChunk->getMin()
- << "max" << frontChunk->getMax()));
+ const ChunkType* backChunk = newChunks.vector().back();
+ const ChunkType* frontChunk = newChunks.vector().front();
- }
+ if (checkIfSingleDoc(txn, collection, idx, backChunk)) {
+ result.append("shouldMigrate",
+ BSON("min" << backChunk->getMin() << "max" << backChunk->getMax()));
+ } else if (checkIfSingleDoc(txn, collection, idx, frontChunk)) {
+ result.append("shouldMigrate",
+ BSON("min" << frontChunk->getMin() << "max" << frontChunk->getMax()));
}
-
- return true;
}
- private:
+ return true;
+ }
- /**
- * Append min, max and version information from chunk to the buffer.
- */
- static void appendShortVersion(BufBuilder& b, const ChunkType& chunk) {
- BSONObjBuilder bb(b);
- bb.append(ChunkType::min(), chunk.getMin());
- bb.append(ChunkType::max(), chunk.getMax());
- if (chunk.isVersionSet())
- chunk.getVersion().addToBSON(bb, ChunkType::DEPRECATED_lastmod());
- bb.done();
- }
+private:
+ /**
+ * Append min, max and version information from chunk to the buffer.
+ */
+ static void appendShortVersion(BufBuilder& b, const ChunkType& chunk) {
+ BSONObjBuilder bb(b);
+ bb.append(ChunkType::min(), chunk.getMin());
+ bb.append(ChunkType::max(), chunk.getMax());
+ if (chunk.isVersionSet())
+ chunk.getVersion().addToBSON(bb, ChunkType::DEPRECATED_lastmod());
+ bb.done();
+ }
- static bool checkIfSingleDoc(OperationContext* txn,
- Collection* collection,
- const IndexDescriptor* idx,
- const ChunkType* chunk) {
- KeyPattern kp(idx->keyPattern());
- BSONObj newmin = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMin(), false));
- BSONObj newmax = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMax(), true));
+ static bool checkIfSingleDoc(OperationContext* txn,
+ Collection* collection,
+ const IndexDescriptor* idx,
+ const ChunkType* chunk) {
+ KeyPattern kp(idx->keyPattern());
+ BSONObj newmin = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMin(), false));
+ BSONObj newmax = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMax(), true));
- auto_ptr<PlanExecutor> exec(
- InternalPlanner::indexScan(txn, collection, idx, newmin, newmax, false));
+ auto_ptr<PlanExecutor> exec(
+ InternalPlanner::indexScan(txn, collection, idx, newmin, newmax, false));
- // check if exactly one document found
- if (PlanExecutor::ADVANCED == exec->getNext(NULL, NULL)) {
- if (PlanExecutor::IS_EOF == exec->getNext(NULL, NULL)) {
- return true;
- }
+ // check if exactly one document found
+ if (PlanExecutor::ADVANCED == exec->getNext(NULL, NULL)) {
+ if (PlanExecutor::IS_EOF == exec->getNext(NULL, NULL)) {
+ return true;
}
-
- return false;
}
- } cmdSplitChunk;
+ return false;
+ }
+
+} cmdSplitChunk;
} // namespace mongo