summaryrefslogtreecommitdiff
path: root/s/d_split.cpp
diff options
context:
space:
mode:
authorAlberto Lerner <alerner@10gen.com>2010-09-16 18:21:59 -0400
committerAlberto Lerner <alerner@10gen.com>2010-09-16 18:21:59 -0400
commitd55c4af16108788e056ce2cd17360426d7820626 (patch)
tree0794561c5e3fbf09503f6edc7dcd9a26c39b3397 /s/d_split.cpp
parentbb9e90d97abacdc6d9e7c4b03e510434563057ca (diff)
downloadmongo-d55c4af16108788e056ce2cd17360426d7820626.tar.gz
SERVER-1807 quicker auto-split if in a jumbo chunk case
Diffstat (limited to 's/d_split.cpp')
-rw-r--r--s/d_split.cpp33
1 files changed, 29 insertions, 4 deletions
diff --git a/s/d_split.cpp b/s/d_split.cpp
index 8665101d636..c2e6c1c418b 100644
--- a/s/d_split.cpp
+++ b/s/d_split.cpp
@@ -127,12 +127,14 @@ namespace mongo {
"Internal command.\n"
"example: { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, maxChunkSize:200 }\n"
"maxChunkSize unit in MBs\n"
+ "May optionally specify 'maxSplitPoints' and 'maxChunkObjects' to avoid traversing the whole chunk\n"
"NOTE: This command may take a while to run";
}
bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
const char* ns = jsobj.getStringField( "splitVector" );
BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
+ // If min and max are not provided use the "minKey" and "maxKey" for the sharding key pattern.
BSONObj min = jsobj.getObjectField( "min" );
BSONObj max = jsobj.getObjectField( "max" );
if ( min.isEmpty() && max.isEmpty() ){
@@ -167,10 +169,22 @@ namespace mongo {
return false;
}
}
-
+
+ long long maxSplitPoints = 0;
+ BSONElement maxSplitPointsElem = jsobj[ "maxSplitPoints" ];
+ if ( maxSplitPointsElem.isNumber() ){
+ maxSplitPoints = maxSplitPointsElem.numberLong();
+ }
+
+ long long maxChunkObjects = 0;
+ BSONElement MaxChunkObjectsElem = jsobj[ "maxChunkObjects" ];
+ if ( MaxChunkObjectsElem.isNumber() ){
+ maxChunkObjects = MaxChunkObjectsElem.numberLong();
+ }
+
+ // Get the size estimate for this namespace
Client::Context ctx( ns );
NamespaceDetails *d = nsdetails( ns );
-
if ( ! d ){
errmsg = "ns not found";
return false;
@@ -182,10 +196,10 @@ namespace mongo {
return false;
}
- // If there's not enough data for more than one chunk, no point continuing.
const long long recCount = d->nrecords;
const long long dataSize = d->datasize;
+ // If there's not enough data for more than one chunk, no point continuing.
if ( dataSize < maxChunkSize || recCount == 0 ) {
vector<BSONObj> emptyVector;
result.append( "splitKeys" , emptyVector );
@@ -193,15 +207,19 @@ namespace mongo {
}
// We'll use the average object size and number of object to find approximately how many keys
- // each chunk should have. We'll split at half the maxChunkSize.
+ // each chunk should have. We'll split at half the maxChunkSize or maxChunkObjects, if
+ // provided.
const long long avgRecSize = dataSize / recCount;
long long keyCount = maxChunkSize / (2 * avgRecSize);
+ if ( maxChunkObjects && ( maxChunkObjects < keyCount ) )
+ keyCount = maxChunkObjects;
// We traverse the index and add the keyCount-th key to the result vector. If that key
// appeared in the vector before, we omit it. The assumption here is that all the
// instances of a key value live in the same chunk.
Timer timer;
long long currCount = 0;
+ long long numChunks = 0;
vector<BSONObj> splitKeys;
BSONObj currKey;
@@ -216,9 +234,16 @@ namespace mongo {
currKey = c->currKey();
splitKeys.push_back( bc->prettyKey( currKey ) );
currCount = 0;
+ numChunks++;
+ log(4) << "picked a split key: " << currKey << endl;
}
}
cc->advance();
+
+ // Stop if we have enough split points.
+ if ( maxSplitPoints && ( numChunks >= maxSplitPoints ) ){
+ break;
+ }
if ( ! cc->yieldSometimes() ){
// we were near and and got pushed to the end