SERVER-9125 Copy collections faster in the config upgrade procedure

author: Alberto Lerner <alerner@10gen.com> 2013-04-01 14:11:19 -0400
committer: Dan Pasette <dan@10gen.com> 2013-04-02 10:25:32 -0400
commit: 3bc93285244eaafe0f5f1d019a8eb74609134239 (patch)
tree: e91815b131edffa6f542e9a9e079259fc65d6fe6
parent: 4a472d8df3a07679edfe36339490a50830408843 (diff)
download: mongo-3bc93285244eaafe0f5f1d019a8eb74609134239.tar.gz
2 files changed, 57 insertions, 3 deletions
diff --git a/src/mongo/client/syncclusterconnection.cpp b/src/mongo/client/syncclusterconnection.cpp
index b0a7a4e3482..7bb538079eb 100644
--- a/src/mongo/client/syncclusterconnection.cpp
+++ b/src/mongo/client/syncclusterconnection.cpp
@@ -336,7 +336,31 @@ namespace mongo {
             return;
         }
 
-        uassert( 10023 , "SyncClusterConnection bulk insert not implemented" , 0);
+        for (vector<BSONObj>::const_iterator it = v.begin(); it != v.end(); ++it ) {
+            BSONObj obj = *it;
+            if ( obj["_id"].type() == EOO ) {
+                string assertMsg = "SyncClusterConnection::insert (batched) obj misses an _id: ";
+                uasserted( 16743, assertMsg + obj.jsonString() );
+            }
+        }
+
+        // fsync all connections before starting the batch.
+        string errmsg;
+        if ( ! prepare( errmsg ) ) {
+            string assertMsg = "SyncClusterConnection::insert (batched) prepare failed: ";
+            throw UserException( 16744, assertMsg + errmsg );
+        }
+
+        // We still want one getlasterror per document, even if they're batched.
+        for ( size_t i=0; i<_conns.size(); i++ ) {
+            for ( vector<BSONObj>::const_iterator it = v.begin(); it != v.end(); ++it ) {
+                _conns[i]->insert( ns, *it, flags );
+                _conns[i]->getLastErrorDetailed();
+            }
+        }
+
+        // We issue a final getlasterror, but this time with an fsync.
+        _checkLast();
     }
 
     void SyncClusterConnection::remove( const string &ns , Query query, int flags ) {
diff --git a/src/mongo/s/config_upgrade_helpers.cpp b/src/mongo/s/config_upgrade_helpers.cpp
index a589e02b460..9b316fa9835 100644
--- a/src/mongo/s/config_upgrade_helpers.cpp
+++ b/src/mongo/s/config_upgrade_helpers.cpp
@@ -19,6 +19,7 @@
 #include "mongo/client/connpool.h"
 #include "mongo/db/namespacestring.h"
 #include "mongo/s/cluster_client_internal.h"
+#include "mongo/util/timer.h"
 
 namespace mongo {
 
@@ -249,16 +250,45 @@ namespace mongo {
             return e.toStatus("could not create indexes in new collection");
         }
 
-        // Copy data over
+        //
+        // Copy data over in batches. A batch size here is way smaller than the maximum size of
+        // a bsonobj. We want to copy efficiently but we don't need to maximize the object size
+        // here.
+        //
+
+        Timer t;
+        int64_t docCount = 0;
+        const int32_t maxBatchSize = BSONObjMaxUserSize / 2;
         try {
             ScopedDbConnection& conn = *connPtr;
             scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(fromNS, BSONObj())));
 
+            vector<BSONObj> insertBatch;
+            int32_t insertSize = 0;
             while (cursor->more()) {
 
                 BSONObj next = cursor->nextSafe();
+                ++docCount;
+
+                if (insertSize + next.objsize() > maxBatchSize ) {
+                    conn->insert(toNS, insertBatch);
+                    _checkGLE(conn);
+                    insertBatch.clear();
+                    insertSize = 0;
+                }
+
+                insertBatch.push_back(next);
+                insertSize += next.objsize();
+
+                if (t.seconds() > 10) {
+                    t.reset();
+                    log() << "Copied " << docCount << " documents so far from "
+                          << fromNS << " to " << toNS;
+                }
+            }
 
-                conn->insert(toNS, next);
+            if (!insertBatch.empty()) {
+                conn->insert(toNS, insertBatch);
                 _checkGLE(conn);
             }
         }
author	Alberto Lerner <alerner@10gen.com>	2013-04-01 14:11:19 -0400
committer	Dan Pasette <dan@10gen.com>	2013-04-02 10:25:32 -0400
commit	3bc93285244eaafe0f5f1d019a8eb74609134239 (patch)
tree	e91815b131edffa6f542e9a9e079259fc65d6fe6
parent	4a472d8df3a07679edfe36339490a50830408843 (diff)
download	mongo-3bc93285244eaafe0f5f1d019a8eb74609134239.tar.gz