summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlberto Lerner <alerner@10gen.com>2013-04-01 14:11:19 -0400
committerDan Pasette <dan@10gen.com>2013-04-02 10:25:32 -0400
commit3bc93285244eaafe0f5f1d019a8eb74609134239 (patch)
treee91815b131edffa6f542e9a9e079259fc65d6fe6
parent4a472d8df3a07679edfe36339490a50830408843 (diff)
downloadmongo-3bc93285244eaafe0f5f1d019a8eb74609134239.tar.gz
SERVER-9125 Copy collections faster in the config upgrade procedure
-rw-r--r--src/mongo/client/syncclusterconnection.cpp26
-rw-r--r--src/mongo/s/config_upgrade_helpers.cpp34
2 files changed, 57 insertions, 3 deletions
diff --git a/src/mongo/client/syncclusterconnection.cpp b/src/mongo/client/syncclusterconnection.cpp
index b0a7a4e3482..7bb538079eb 100644
--- a/src/mongo/client/syncclusterconnection.cpp
+++ b/src/mongo/client/syncclusterconnection.cpp
@@ -336,7 +336,31 @@ namespace mongo {
return;
}
- uassert( 10023 , "SyncClusterConnection bulk insert not implemented" , 0);
+ for (vector<BSONObj>::const_iterator it = v.begin(); it != v.end(); ++it ) {
+ BSONObj obj = *it;
+ if ( obj["_id"].type() == EOO ) {
+ string assertMsg = "SyncClusterConnection::insert (batched) obj misses an _id: ";
+ uasserted( 16743, assertMsg + obj.jsonString() );
+ }
+ }
+
+ // fsync all connections before starting the batch.
+ string errmsg;
+ if ( ! prepare( errmsg ) ) {
+ string assertMsg = "SyncClusterConnection::insert (batched) prepare failed: ";
+ throw UserException( 16744, assertMsg + errmsg );
+ }
+
+ // We still want one getlasterror per document, even if they're batched.
+ for ( size_t i=0; i<_conns.size(); i++ ) {
+ for ( vector<BSONObj>::const_iterator it = v.begin(); it != v.end(); ++it ) {
+ _conns[i]->insert( ns, *it, flags );
+ _conns[i]->getLastErrorDetailed();
+ }
+ }
+
+ // We issue a final getlasterror, but this time with an fsync.
+ _checkLast();
}
void SyncClusterConnection::remove( const string &ns , Query query, int flags ) {
diff --git a/src/mongo/s/config_upgrade_helpers.cpp b/src/mongo/s/config_upgrade_helpers.cpp
index a589e02b460..9b316fa9835 100644
--- a/src/mongo/s/config_upgrade_helpers.cpp
+++ b/src/mongo/s/config_upgrade_helpers.cpp
@@ -19,6 +19,7 @@
#include "mongo/client/connpool.h"
#include "mongo/db/namespacestring.h"
#include "mongo/s/cluster_client_internal.h"
+#include "mongo/util/timer.h"
namespace mongo {
@@ -249,16 +250,45 @@ namespace mongo {
return e.toStatus("could not create indexes in new collection");
}
- // Copy data over
+ //
+ // Copy data over in batches. A batch size here is way smaller than the maximum size of
+ // a bsonobj. We want to copy efficiently but we don't need to maximize the object size
+ // here.
+ //
+
+ Timer t;
+ int64_t docCount = 0;
+ const int32_t maxBatchSize = BSONObjMaxUserSize / 2;
try {
ScopedDbConnection& conn = *connPtr;
scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(fromNS, BSONObj())));
+ vector<BSONObj> insertBatch;
+ int32_t insertSize = 0;
while (cursor->more()) {
BSONObj next = cursor->nextSafe();
+ ++docCount;
+
+ if (insertSize + next.objsize() > maxBatchSize ) {
+ conn->insert(toNS, insertBatch);
+ _checkGLE(conn);
+ insertBatch.clear();
+ insertSize = 0;
+ }
+
+ insertBatch.push_back(next);
+ insertSize += next.objsize();
+
+ if (t.seconds() > 10) {
+ t.reset();
+ log() << "Copied " << docCount << " documents so far from "
+ << fromNS << " to " << toNS;
+ }
+ }
- conn->insert(toNS, next);
+ if (!insertBatch.empty()) {
+ conn->insert(toNS, insertBatch);
_checkGLE(conn);
}
}