shell helpers for some sharding operations/status across a collection

author: gregs <greg@10gen.com> 2011-09-14 16:23:56 -0400
committer: gregs <greg@10gen.com> 2011-11-10 11:27:32 -0500
commit: d42cf0df2b760e7a5ff04d39312c44715d580187 (patch)
tree: d5aeec5e08038fae8525210c63a7001b47cb086e
parent: 6cea95fbf5f2bff8809ee9a001ca680460d86a0f (diff)
download: mongo-d42cf0df2b760e7a5ff04d39312c44715d580187.tar.gz
2 files changed, 205 insertions, 0 deletions
diff --git a/shell/collection.js b/shell/collection.js
index 1e6fe032387..484e435fc8d 100644
--- a/shell/collection.js
+++ b/shell/collection.js
@@ -62,6 +62,8 @@ DBCollection.prototype.help = function () {
     print("\tdb." + shortName + ".update(query, object[, upsert_bool, multi_bool])");
     print("\tdb." + shortName + ".validate( <full> ) - SLOW");;
     print("\tdb." + shortName + ".getShardVersion() - only for use with sharding");
+    print("\tdb." + shortName + ".getShardDistribution() - prints statistics about data distribution in the cluster");
+    print("\tdb." + shortName + ".getSplitKeysForChunks( <maxChunkSize> ) - calculates split points over all chunks and returns splitter function");
     return __magicNoPrint;
 }
 
@@ -654,3 +656,190 @@ DBCollection.autocomplete = function(obj){
     }
     return ret;
 }
+
+
+// Sharding additions
+
+/* 
+Usage :
+
+mongo <mongos>
+> load('path-to-file/shardingAdditions.js')
+Loading custom sharding extensions...
+true
+
+> var collection = db.getMongo().getCollection("foo.bar")
+> collection.getShardDistribution() // prints statistics related to the collection's data distribution
+
+> collection.getSplitKeysForChunks() // generates split points for all chunks in the collection, based on the
+                                     // default maxChunkSize or alternately a specified chunk size
+> collection.getSplitKeysForChunks( 10 ) // Mb
+
+> var splitter = collection.getSplitKeysForChunks() // by default, the chunks are not split, the keys are just
+                                                    // found.  A splitter function is returned which will actually
+                                                    // do the splits.
+                                                    
+> splitter() // ! Actually executes the splits on the cluster !
+                                                    
+*/
+
+DBCollection.prototype.getShardDistribution = function(){
+
+   var stats = this.stats()
+   
+   if( ! stats.sharded ){
+       print( "Collection " + this + " is not sharded." )
+       return
+   }
+   
+   var config = this.getMongo().getDB("config")
+       
+   var numChunks = 0
+   
+   for( var shard in stats.shards ){
+       
+       var shardDoc = config.shards.findOne({ _id : shard })
+       
+       print( "\nShard " + shard + " at " + shardDoc.host ) 
+       
+       var shardStats = stats.shards[ shard ]
+               
+       var chunks = config.chunks.find({ _id : sh._collRE( coll ), shard : shard }).toArray()
+       
+       numChunks += chunks.length
+       
+       var estChunkData = shardStats.size / chunks.length
+       var estChunkCount = Math.floor( shardStats.count / chunks.length )
+       
+       print( " data : " + sh._dataFormat( shardStats.size ) +
+              " docs : " + shardStats.count +
+              " chunks : " +  chunks.length )
+       print( " estimated data per chunk : " + sh._dataFormat( estChunkData ) )
+       print( " estimated docs per chunk : " + estChunkCount )
+       
+   }
+   
+   print( "\nTotals" )
+   print( " data : " + sh._dataFormat( stats.size ) +
+          " docs : " + stats.count +
+          " chunks : " +  numChunks )
+   for( var shard in stats.shards ){
+   
+       var shardStats = stats.shards[ shard ]
+       
+       var estDataPercent = Math.floor( shardStats.size / stats.size * 100 ) / 100
+       var estDocPercent = Math.floor( shardStats.count / stats.count * 100 ) / 100
+       
+       print( " Shard " + shard + " data : " + estDataPercent + "%, docs : " + estDocPercent + "%" + 
+              ", avg obj size : " + sh._dataFormat( stats.shards[ shard ].avgObjSize ) )
+   }
+   
+   print( "\n" )
+   
+}
+
+
+DBCollection.prototype.getSplitKeysForChunks = function( chunkSize ){
+       
+   var stats = this.stats()
+   
+   if( ! stats.sharded ){
+       print( "Collection " + this + " is not sharded." )
+       return
+   }
+   
+   var config = this.getMongo().getDB("config")
+   
+   if( ! chunkSize ){
+       chunkSize = config.settings.findOne({ _id : "chunksize" }).value
+       print( "Chunk size not set, using default of " + chunkSize + "Mb" )
+   }
+   else{
+       print( "Using chunk size of " + chunkSize + "Mb" )
+   }
+    
+   var shardDocs = config.shards.find().toArray()
+   
+   var allSplitPoints = {}
+   var numSplits = 0    
+   
+   for( var i = 0; i < shardDocs.length; i++ ){
+       
+       var shardDoc = shardDocs[i]
+       var shard = shardDoc._id
+       var host = shardDoc.host
+       var sconn = new Mongo( host )
+       
+       var chunks = config.chunks.find({ _id : sh._collRE( this ), shard : shard }).toArray()
+       
+       print( "\nGetting split points for chunks on shard " + shard + " at " + host )
+               
+       var splitPoints = []
+       
+       for( var j = 0; j < chunks.length; j++ ){
+           var chunk = chunks[j]
+           var result = sconn.getDB("admin").runCommand({ splitVector : this + "", min : chunk.min, max : chunk.max, maxChunkSize : chunkSize })
+           if( ! result.ok ){
+               print( " Had trouble getting split keys for chunk " + sh._pchunk( chunk ) + " :\n" )
+               printjson( result )
+           }
+           else{
+               splitPoints = splitPoints.concat( result.splitKeys )
+               
+               if( result.splitKeys.length > 0 )
+                   print( " Added " + result.splitKeys.length + " split points for chunk " + sh._pchunk( chunk ) )
+           }
+       }
+       
+       print( "Total splits for shard " + shard + " : " + splitPoints.length )
+       
+       numSplits += splitPoints.length
+       allSplitPoints[ shard ] = splitPoints
+       
+   }
+   
+   // Get most recent migration
+   var migration = config.changelog.find({ what : /^move.*/ }).sort({ time : -1 }).limit( 1 ).toArray()
+   if( migration.length == 0 ) 
+       print( "\nNo migrations found in changelog." )
+   else {
+       migration = migration[0]
+       print( "\nMost recent migration activity was on " + migration.ns + " at " + migration.time )
+   }
+   
+   var admin = this.getMongo().getDB("admin") 
+   var coll = this
+   var splitFunction = function(){
+       
+       // Turn off the balancer, just to be safe
+       print( "Turning off balancer..." )
+       config.settings.update({ _id : "balancer" }, { $set : { stopped : true } }, true )
+       print( "Sleeping for 30s to allow balancers to detect change.  To be extra safe, check config.changelog" +
+              " for recent migrations." )
+       sleep( 30000 )
+              
+       for( shard in allSplitPoints ){
+           for( var i = 0; i < allSplitPoints[ shard ].length; i++ ){
+               var splitKey = allSplitPoints[ shard ][i]
+               print( "Splitting at " + tojson( splitKey ) )
+               printjson( admin.runCommand({ split : coll + "", middle : splitKey }) )
+           }
+       }
+       
+       print( "Turning the balancer back on." )
+       config.settings.update({ _id : "balancer" }, { $set : { stopped : false } } )
+       sleep( 1 )
+   }
+   
+   print( "\nGenerated " + numSplits + " split keys, run output function to perform splits.\n" +
+          " ex : \n" + 
+          "  > var splitter = <collection>.getSplitKeysForChunks()\n" +
+          "  > splitter() // Execute splits on cluster !\n" )
+       
+   return splitFunction
+   
+}
+
+
+
+
diff --git a/shell/utils_sh.js b/shell/utils_sh.js
index 5bd449bc61d..297643fd270 100644
--- a/shell/utils_sh.js
+++ b/shell/utils_sh.js
@@ -23,6 +23,22 @@ sh._adminCommand = function( cmd , skipCheck ) {
     return res;
 }
 
+
+sh._dataFormat = function( bytes ){
+   if( bytes < 1024 ) return Math.floor( bytes ) + "b"
+   if( bytes < 1024 * 1024 ) return Math.floor( bytes / 1024 ) + "kb"
+   if( bytes < 1024 * 1024 * 1024 ) return Math.floor( ( Math.floor( bytes / 1024 ) / 1024 ) * 100 ) / 100 + "Mb"
+   return Math.floor( ( Math.floor( bytes / ( 1024 * 1024 ) ) / 1024 ) * 100 ) / 100 + "Gb"
+}
+
+sh._collRE = function( coll ){
+   return RegExp( "^" + (coll + "").replace(/\./g, "\\.") + "-.*" )
+}
+
+sh._pchunk = function( chunk ){
+   return "[" + tojson( chunk.min ) + " -> " + tojson( chunk.max ) + "]"
+}
+
 sh.help = function() {
     print( "\tsh.addShard( host )                       server:port OR setname/server:port" )
     print( "\tsh.enableSharding(dbname)                 enables sharding on the database dbname" )
author	gregs <greg@10gen.com>	2011-09-14 16:23:56 -0400
committer	gregs <greg@10gen.com>	2011-11-10 11:27:32 -0500
commit	d42cf0df2b760e7a5ff04d39312c44715d580187 (patch)
tree	d5aeec5e08038fae8525210c63a7001b47cb086e
parent	6cea95fbf5f2bff8809ee9a001ca680460d86a0f (diff)
download	mongo-d42cf0df2b760e7a5ff04d39312c44715d580187.tar.gz