diff options
author | gregs <greg@10gen.com> | 2011-09-14 16:23:56 -0400 |
---|---|---|
committer | gregs <greg@10gen.com> | 2011-11-10 11:27:32 -0500 |
commit | d42cf0df2b760e7a5ff04d39312c44715d580187 (patch) | |
tree | d5aeec5e08038fae8525210c63a7001b47cb086e | |
parent | 6cea95fbf5f2bff8809ee9a001ca680460d86a0f (diff) | |
download | mongo-d42cf0df2b760e7a5ff04d39312c44715d580187.tar.gz |
shell helpers for some sharding operations/status across a collection
-rw-r--r-- | shell/collection.js | 189 | ||||
-rw-r--r-- | shell/utils_sh.js | 16 |
2 files changed, 205 insertions, 0 deletions
diff --git a/shell/collection.js b/shell/collection.js index 1e6fe032387..484e435fc8d 100644 --- a/shell/collection.js +++ b/shell/collection.js @@ -62,6 +62,8 @@ DBCollection.prototype.help = function () { print("\tdb." + shortName + ".update(query, object[, upsert_bool, multi_bool])"); print("\tdb." + shortName + ".validate( <full> ) - SLOW");; print("\tdb." + shortName + ".getShardVersion() - only for use with sharding"); + print("\tdb." + shortName + ".getShardDistribution() - prints statistics about data distribution in the cluster"); + print("\tdb." + shortName + ".getSplitKeysForChunks( <maxChunkSize> ) - calculates split points over all chunks and returns splitter function"); return __magicNoPrint; } @@ -654,3 +656,190 @@ DBCollection.autocomplete = function(obj){ } return ret; } + + +// Sharding additions + +/* +Usage : + +mongo <mongos> +> load('path-to-file/shardingAdditions.js') +Loading custom sharding extensions... +true + +> var collection = db.getMongo().getCollection("foo.bar") +> collection.getShardDistribution() // prints statistics related to the collection's data distribution + +> collection.getSplitKeysForChunks() // generates split points for all chunks in the collection, based on the + // default maxChunkSize or alternately a specified chunk size +> collection.getSplitKeysForChunks( 10 ) // Mb + +> var splitter = collection.getSplitKeysForChunks() // by default, the chunks are not split, the keys are just + // found. A splitter function is returned which will actually + // do the splits. + +> splitter() // ! Actually executes the splits on the cluster ! + +*/ + +DBCollection.prototype.getShardDistribution = function(){ + + var stats = this.stats() + + if( ! stats.sharded ){ + print( "Collection " + this + " is not sharded." ) + return + } + + var config = this.getMongo().getDB("config") + + var numChunks = 0 + + for( var shard in stats.shards ){ + + var shardDoc = config.shards.findOne({ _id : shard }) + + print( "\nShard " + shard + " at " + shardDoc.host ) + + var shardStats = stats.shards[ shard ] + + var chunks = config.chunks.find({ _id : sh._collRE( coll ), shard : shard }).toArray() + + numChunks += chunks.length + + var estChunkData = shardStats.size / chunks.length + var estChunkCount = Math.floor( shardStats.count / chunks.length ) + + print( " data : " + sh._dataFormat( shardStats.size ) + + " docs : " + shardStats.count + + " chunks : " + chunks.length ) + print( " estimated data per chunk : " + sh._dataFormat( estChunkData ) ) + print( " estimated docs per chunk : " + estChunkCount ) + + } + + print( "\nTotals" ) + print( " data : " + sh._dataFormat( stats.size ) + + " docs : " + stats.count + + " chunks : " + numChunks ) + for( var shard in stats.shards ){ + + var shardStats = stats.shards[ shard ] + + var estDataPercent = Math.floor( shardStats.size / stats.size * 100 ) / 100 + var estDocPercent = Math.floor( shardStats.count / stats.count * 100 ) / 100 + + print( " Shard " + shard + " data : " + estDataPercent + "%, docs : " + estDocPercent + "%" + + ", avg obj size : " + sh._dataFormat( stats.shards[ shard ].avgObjSize ) ) + } + + print( "\n" ) + +} + + +DBCollection.prototype.getSplitKeysForChunks = function( chunkSize ){ + + var stats = this.stats() + + if( ! stats.sharded ){ + print( "Collection " + this + " is not sharded." ) + return + } + + var config = this.getMongo().getDB("config") + + if( ! chunkSize ){ + chunkSize = config.settings.findOne({ _id : "chunksize" }).value + print( "Chunk size not set, using default of " + chunkSize + "Mb" ) + } + else{ + print( "Using chunk size of " + chunkSize + "Mb" ) + } + + var shardDocs = config.shards.find().toArray() + + var allSplitPoints = {} + var numSplits = 0 + + for( var i = 0; i < shardDocs.length; i++ ){ + + var shardDoc = shardDocs[i] + var shard = shardDoc._id + var host = shardDoc.host + var sconn = new Mongo( host ) + + var chunks = config.chunks.find({ _id : sh._collRE( this ), shard : shard }).toArray() + + print( "\nGetting split points for chunks on shard " + shard + " at " + host ) + + var splitPoints = [] + + for( var j = 0; j < chunks.length; j++ ){ + var chunk = chunks[j] + var result = sconn.getDB("admin").runCommand({ splitVector : this + "", min : chunk.min, max : chunk.max, maxChunkSize : chunkSize }) + if( ! result.ok ){ + print( " Had trouble getting split keys for chunk " + sh._pchunk( chunk ) + " :\n" ) + printjson( result ) + } + else{ + splitPoints = splitPoints.concat( result.splitKeys ) + + if( result.splitKeys.length > 0 ) + print( " Added " + result.splitKeys.length + " split points for chunk " + sh._pchunk( chunk ) ) + } + } + + print( "Total splits for shard " + shard + " : " + splitPoints.length ) + + numSplits += splitPoints.length + allSplitPoints[ shard ] = splitPoints + + } + + // Get most recent migration + var migration = config.changelog.find({ what : /^move.*/ }).sort({ time : -1 }).limit( 1 ).toArray() + if( migration.length == 0 ) + print( "\nNo migrations found in changelog." ) + else { + migration = migration[0] + print( "\nMost recent migration activity was on " + migration.ns + " at " + migration.time ) + } + + var admin = this.getMongo().getDB("admin") + var coll = this + var splitFunction = function(){ + + // Turn off the balancer, just to be safe + print( "Turning off balancer..." ) + config.settings.update({ _id : "balancer" }, { $set : { stopped : true } }, true ) + print( "Sleeping for 30s to allow balancers to detect change. To be extra safe, check config.changelog" + + " for recent migrations." ) + sleep( 30000 ) + + for( shard in allSplitPoints ){ + for( var i = 0; i < allSplitPoints[ shard ].length; i++ ){ + var splitKey = allSplitPoints[ shard ][i] + print( "Splitting at " + tojson( splitKey ) ) + printjson( admin.runCommand({ split : coll + "", middle : splitKey }) ) + } + } + + print( "Turning the balancer back on." ) + config.settings.update({ _id : "balancer" }, { $set : { stopped : false } } ) + sleep( 1 ) + } + + print( "\nGenerated " + numSplits + " split keys, run output function to perform splits.\n" + + " ex : \n" + + " > var splitter = <collection>.getSplitKeysForChunks()\n" + + " > splitter() // Execute splits on cluster !\n" ) + + return splitFunction + +} + + + + diff --git a/shell/utils_sh.js b/shell/utils_sh.js index 5bd449bc61d..297643fd270 100644 --- a/shell/utils_sh.js +++ b/shell/utils_sh.js @@ -23,6 +23,22 @@ sh._adminCommand = function( cmd , skipCheck ) { return res; } + +sh._dataFormat = function( bytes ){ + if( bytes < 1024 ) return Math.floor( bytes ) + "b" + if( bytes < 1024 * 1024 ) return Math.floor( bytes / 1024 ) + "kb" + if( bytes < 1024 * 1024 * 1024 ) return Math.floor( ( Math.floor( bytes / 1024 ) / 1024 ) * 100 ) / 100 + "Mb" + return Math.floor( ( Math.floor( bytes / ( 1024 * 1024 ) ) / 1024 ) * 100 ) / 100 + "Gb" +} + +sh._collRE = function( coll ){ + return RegExp( "^" + (coll + "").replace(/\./g, "\\.") + "-.*" ) +} + +sh._pchunk = function( chunk ){ + return "[" + tojson( chunk.min ) + " -> " + tojson( chunk.max ) + "]" +} + sh.help = function() { print( "\tsh.addShard( host ) server:port OR setname/server:port" ) print( "\tsh.enableSharding(dbname) enables sharding on the database dbname" ) |