summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgregs <greg@10gen.com>2011-09-14 16:23:56 -0400
committergregs <greg@10gen.com>2011-11-10 11:27:32 -0500
commitd42cf0df2b760e7a5ff04d39312c44715d580187 (patch)
treed5aeec5e08038fae8525210c63a7001b47cb086e
parent6cea95fbf5f2bff8809ee9a001ca680460d86a0f (diff)
downloadmongo-d42cf0df2b760e7a5ff04d39312c44715d580187.tar.gz
shell helpers for some sharding operations/status across a collection
-rw-r--r--shell/collection.js189
-rw-r--r--shell/utils_sh.js16
2 files changed, 205 insertions, 0 deletions
diff --git a/shell/collection.js b/shell/collection.js
index 1e6fe032387..484e435fc8d 100644
--- a/shell/collection.js
+++ b/shell/collection.js
@@ -62,6 +62,8 @@ DBCollection.prototype.help = function () {
print("\tdb." + shortName + ".update(query, object[, upsert_bool, multi_bool])");
print("\tdb." + shortName + ".validate( <full> ) - SLOW");;
print("\tdb." + shortName + ".getShardVersion() - only for use with sharding");
+ print("\tdb." + shortName + ".getShardDistribution() - prints statistics about data distribution in the cluster");
+ print("\tdb." + shortName + ".getSplitKeysForChunks( <maxChunkSize> ) - calculates split points over all chunks and returns splitter function");
return __magicNoPrint;
}
@@ -654,3 +656,190 @@ DBCollection.autocomplete = function(obj){
}
return ret;
}
+
+
+// Sharding additions
+
+/*
+Usage :
+
+mongo <mongos>
+> load('path-to-file/shardingAdditions.js')
+Loading custom sharding extensions...
+true
+
+> var collection = db.getMongo().getCollection("foo.bar")
+> collection.getShardDistribution() // prints statistics related to the collection's data distribution
+
+> collection.getSplitKeysForChunks() // generates split points for all chunks in the collection, based on the
+ // default maxChunkSize or alternately a specified chunk size
+> collection.getSplitKeysForChunks( 10 ) // Mb
+
+> var splitter = collection.getSplitKeysForChunks() // by default, the chunks are not split, the keys are just
+ // found. A splitter function is returned which will actually
+ // do the splits.
+
+> splitter() // ! Actually executes the splits on the cluster !
+
+*/
+
+DBCollection.prototype.getShardDistribution = function(){
+
+ var stats = this.stats()
+
+ if( ! stats.sharded ){
+ print( "Collection " + this + " is not sharded." )
+ return
+ }
+
+ var config = this.getMongo().getDB("config")
+
+ var numChunks = 0
+
+ for( var shard in stats.shards ){
+
+ var shardDoc = config.shards.findOne({ _id : shard })
+
+ print( "\nShard " + shard + " at " + shardDoc.host )
+
+ var shardStats = stats.shards[ shard ]
+
+ var chunks = config.chunks.find({ _id : sh._collRE( coll ), shard : shard }).toArray()
+
+ numChunks += chunks.length
+
+ var estChunkData = shardStats.size / chunks.length
+ var estChunkCount = Math.floor( shardStats.count / chunks.length )
+
+ print( " data : " + sh._dataFormat( shardStats.size ) +
+ " docs : " + shardStats.count +
+ " chunks : " + chunks.length )
+ print( " estimated data per chunk : " + sh._dataFormat( estChunkData ) )
+ print( " estimated docs per chunk : " + estChunkCount )
+
+ }
+
+ print( "\nTotals" )
+ print( " data : " + sh._dataFormat( stats.size ) +
+ " docs : " + stats.count +
+ " chunks : " + numChunks )
+ for( var shard in stats.shards ){
+
+ var shardStats = stats.shards[ shard ]
+
+ var estDataPercent = Math.floor( shardStats.size / stats.size * 100 ) / 100
+ var estDocPercent = Math.floor( shardStats.count / stats.count * 100 ) / 100
+
+ print( " Shard " + shard + " data : " + estDataPercent + "%, docs : " + estDocPercent + "%" +
+ ", avg obj size : " + sh._dataFormat( stats.shards[ shard ].avgObjSize ) )
+ }
+
+ print( "\n" )
+
+}
+
+
+DBCollection.prototype.getSplitKeysForChunks = function( chunkSize ){
+
+ var stats = this.stats()
+
+ if( ! stats.sharded ){
+ print( "Collection " + this + " is not sharded." )
+ return
+ }
+
+ var config = this.getMongo().getDB("config")
+
+ if( ! chunkSize ){
+ chunkSize = config.settings.findOne({ _id : "chunksize" }).value
+ print( "Chunk size not set, using default of " + chunkSize + "Mb" )
+ }
+ else{
+ print( "Using chunk size of " + chunkSize + "Mb" )
+ }
+
+ var shardDocs = config.shards.find().toArray()
+
+ var allSplitPoints = {}
+ var numSplits = 0
+
+ for( var i = 0; i < shardDocs.length; i++ ){
+
+ var shardDoc = shardDocs[i]
+ var shard = shardDoc._id
+ var host = shardDoc.host
+ var sconn = new Mongo( host )
+
+ var chunks = config.chunks.find({ _id : sh._collRE( this ), shard : shard }).toArray()
+
+ print( "\nGetting split points for chunks on shard " + shard + " at " + host )
+
+ var splitPoints = []
+
+ for( var j = 0; j < chunks.length; j++ ){
+ var chunk = chunks[j]
+ var result = sconn.getDB("admin").runCommand({ splitVector : this + "", min : chunk.min, max : chunk.max, maxChunkSize : chunkSize })
+ if( ! result.ok ){
+ print( " Had trouble getting split keys for chunk " + sh._pchunk( chunk ) + " :\n" )
+ printjson( result )
+ }
+ else{
+ splitPoints = splitPoints.concat( result.splitKeys )
+
+ if( result.splitKeys.length > 0 )
+ print( " Added " + result.splitKeys.length + " split points for chunk " + sh._pchunk( chunk ) )
+ }
+ }
+
+ print( "Total splits for shard " + shard + " : " + splitPoints.length )
+
+ numSplits += splitPoints.length
+ allSplitPoints[ shard ] = splitPoints
+
+ }
+
+ // Get most recent migration
+ var migration = config.changelog.find({ what : /^move.*/ }).sort({ time : -1 }).limit( 1 ).toArray()
+ if( migration.length == 0 )
+ print( "\nNo migrations found in changelog." )
+ else {
+ migration = migration[0]
+ print( "\nMost recent migration activity was on " + migration.ns + " at " + migration.time )
+ }
+
+ var admin = this.getMongo().getDB("admin")
+ var coll = this
+ var splitFunction = function(){
+
+ // Turn off the balancer, just to be safe
+ print( "Turning off balancer..." )
+ config.settings.update({ _id : "balancer" }, { $set : { stopped : true } }, true )
+ print( "Sleeping for 30s to allow balancers to detect change. To be extra safe, check config.changelog" +
+ " for recent migrations." )
+ sleep( 30000 )
+
+ for( shard in allSplitPoints ){
+ for( var i = 0; i < allSplitPoints[ shard ].length; i++ ){
+ var splitKey = allSplitPoints[ shard ][i]
+ print( "Splitting at " + tojson( splitKey ) )
+ printjson( admin.runCommand({ split : coll + "", middle : splitKey }) )
+ }
+ }
+
+ print( "Turning the balancer back on." )
+ config.settings.update({ _id : "balancer" }, { $set : { stopped : false } } )
+ sleep( 1 )
+ }
+
+ print( "\nGenerated " + numSplits + " split keys, run output function to perform splits.\n" +
+ " ex : \n" +
+ " > var splitter = <collection>.getSplitKeysForChunks()\n" +
+ " > splitter() // Execute splits on cluster !\n" )
+
+ return splitFunction
+
+}
+
+
+
+
diff --git a/shell/utils_sh.js b/shell/utils_sh.js
index 5bd449bc61d..297643fd270 100644
--- a/shell/utils_sh.js
+++ b/shell/utils_sh.js
@@ -23,6 +23,22 @@ sh._adminCommand = function( cmd , skipCheck ) {
return res;
}
+
+sh._dataFormat = function( bytes ){
+ if( bytes < 1024 ) return Math.floor( bytes ) + "b"
+ if( bytes < 1024 * 1024 ) return Math.floor( bytes / 1024 ) + "kb"
+ if( bytes < 1024 * 1024 * 1024 ) return Math.floor( ( Math.floor( bytes / 1024 ) / 1024 ) * 100 ) / 100 + "Mb"
+ return Math.floor( ( Math.floor( bytes / ( 1024 * 1024 ) ) / 1024 ) * 100 ) / 100 + "Gb"
+}
+
+sh._collRE = function( coll ){
+ return RegExp( "^" + (coll + "").replace(/\./g, "\\.") + "-.*" )
+}
+
+sh._pchunk = function( chunk ){
+ return "[" + tojson( chunk.min ) + " -> " + tojson( chunk.max ) + "]"
+}
+
sh.help = function() {
print( "\tsh.addShard( host ) server:port OR setname/server:port" )
print( "\tsh.enableSharding(dbname) enables sharding on the database dbname" )