diff options
author | Alberto Lerner <alerner@10gen.com> | 2010-09-16 18:21:59 -0400 |
---|---|---|
committer | Alberto Lerner <alerner@10gen.com> | 2010-09-16 18:21:59 -0400 |
commit | d55c4af16108788e056ce2cd17360426d7820626 (patch) | |
tree | 0794561c5e3fbf09503f6edc7dcd9a26c39b3397 /s/d_split.cpp | |
parent | bb9e90d97abacdc6d9e7c4b03e510434563057ca (diff) | |
download | mongo-d55c4af16108788e056ce2cd17360426d7820626.tar.gz |
SERVER-1807 quicker auto-split if in a jumbo chunk case
Diffstat (limited to 's/d_split.cpp')
-rw-r--r-- | s/d_split.cpp | 33 |
1 files changed, 29 insertions, 4 deletions
diff --git a/s/d_split.cpp b/s/d_split.cpp index 8665101d636..c2e6c1c418b 100644 --- a/s/d_split.cpp +++ b/s/d_split.cpp @@ -127,12 +127,14 @@ namespace mongo { "Internal command.\n" "example: { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, maxChunkSize:200 }\n" "maxChunkSize unit in MBs\n" + "May optionally specify 'maxSplitPoints' and 'maxChunkObjects' to avoid traversing the whole chunk\n" "NOTE: This command may take a while to run"; } bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ const char* ns = jsobj.getStringField( "splitVector" ); BSONObj keyPattern = jsobj.getObjectField( "keyPattern" ); + // If min and max are not provided use the "minKey" and "maxKey" for the sharding key pattern. BSONObj min = jsobj.getObjectField( "min" ); BSONObj max = jsobj.getObjectField( "max" ); if ( min.isEmpty() && max.isEmpty() ){ @@ -167,10 +169,22 @@ namespace mongo { return false; } } - + + long long maxSplitPoints = 0; + BSONElement maxSplitPointsElem = jsobj[ "maxSplitPoints" ]; + if ( maxSplitPointsElem.isNumber() ){ + maxSplitPoints = maxSplitPointsElem.numberLong(); + } + + long long maxChunkObjects = 0; + BSONElement MaxChunkObjectsElem = jsobj[ "maxChunkObjects" ]; + if ( MaxChunkObjectsElem.isNumber() ){ + maxChunkObjects = MaxChunkObjectsElem.numberLong(); + } + + // Get the size estimate for this namespace Client::Context ctx( ns ); NamespaceDetails *d = nsdetails( ns ); - if ( ! d ){ errmsg = "ns not found"; return false; @@ -182,10 +196,10 @@ namespace mongo { return false; } - // If there's not enough data for more than one chunk, no point continuing. const long long recCount = d->nrecords; const long long dataSize = d->datasize; + // If there's not enough data for more than one chunk, no point continuing. if ( dataSize < maxChunkSize || recCount == 0 ) { vector<BSONObj> emptyVector; result.append( "splitKeys" , emptyVector ); @@ -193,15 +207,19 @@ namespace mongo { } // We'll use the average object size and number of object to find approximately how many keys - // each chunk should have. We'll split at half the maxChunkSize. + // each chunk should have. We'll split at half the maxChunkSize or maxChunkObjects, if + // provided. const long long avgRecSize = dataSize / recCount; long long keyCount = maxChunkSize / (2 * avgRecSize); + if ( maxChunkObjects && ( maxChunkObjects < keyCount ) ) + keyCount = maxChunkObjects; // We traverse the index and add the keyCount-th key to the result vector. If that key // appeared in the vector before, we omit it. The assumption here is that all the // instances of a key value live in the same chunk. Timer timer; long long currCount = 0; + long long numChunks = 0; vector<BSONObj> splitKeys; BSONObj currKey; @@ -216,9 +234,16 @@ namespace mongo { currKey = c->currKey(); splitKeys.push_back( bc->prettyKey( currKey ) ); currCount = 0; + numChunks++; + log(4) << "picked a split key: " << currKey << endl; } } cc->advance(); + + // Stop if we have enough split points. + if ( maxSplitPoints && ( numChunks >= maxSplitPoints ) ){ + break; + } if ( ! cc->yieldSometimes() ){ // we were near and and got pushed to the end |