diff options
5 files changed, 158 insertions, 40 deletions
diff --git a/jstests/aggregation/sources/out/mode_insert_documents.js b/jstests/aggregation/sources/out/mode_insert_documents.js index 0490831cb02..9cbe3dd8c93 100644 --- a/jstests/aggregation/sources/out/mode_insert_documents.js +++ b/jstests/aggregation/sources/out/mode_insert_documents.js @@ -65,19 +65,51 @@ assertErrorCode(coll, pipeline, ErrorCodes.DuplicateKey); // + // Test that an $out aggregation succeeds even if the _id is stripped out and the "uniqueKey" + // is the document key, which will be _id for a new collection. + // + coll.drop(); + assert.commandWorked(coll.insert({a: 0})); + targetColl.drop(); + assert.doesNotThrow(() => coll.aggregate([ + {$project: {_id: 0}}, + {$out: {to: targetColl.getName(), mode: "insertDocuments"}}, + ])); + assert.eq(1, targetColl.find().itcount()); + + // + // Test that an $out aggregation succeeds even if the _id is stripped out and _id is part of a + // multi-field "uniqueKey". + // + coll.drop(); + assert.commandWorked(coll.insert([{_id: "should be projected away", name: "kyle"}])); + targetColl.drop(); + assert.commandWorked(targetColl.createIndex({_id: 1, name: -1}, {unique: true})); + assert.doesNotThrow(() => coll.aggregate([ + {$project: {_id: 0}}, + {$out: {to: targetColl.getName(), mode: "insertDocuments", uniqueKey: {_id: 1, name: 1}}}, + ])); + assert.eq(1, targetColl.find().itcount()); + + // // Tests for $out to a database that differs from the aggregation database. // const foreignDb = db.getSiblingDB("mode_insert_documents_foreign"); const foreignTargetColl = foreignDb.mode_insert_documents_out; - const pipelineDifferentOutputDb = [{ - $out: { - to: foreignTargetColl.getName(), - db: foreignDb.getName(), - mode: "insertDocuments", + const pipelineDifferentOutputDb = [ + {$project: {_id: 0}}, + { + $out: { + to: foreignTargetColl.getName(), + db: foreignDb.getName(), + mode: "insertDocuments", + } } - }]; + ]; foreignDb.dropDatabase(); + coll.drop(); + assert.commandWorked(coll.insert({a: 1})); if (!FixtureHelpers.isMongos(db)) { // @@ -85,20 +117,20 @@ // doesn't exist. // coll.aggregate(pipelineDifferentOutputDb); - assert.eq(foreignTargetColl.find().itcount(), 2); - - // - // First, replace the contents of the collection with new documents that have different _id - // values. Then, re-run the same aggregation, which should merge the new documents into the - // existing output collection without overwriting the existing, non-conflicting documents. - // - coll.drop(); - assert.commandWorked(coll.insert([{_id: 2, a: 2}, {_id: 3, a: 3}])); - coll.aggregate(pipelineDifferentOutputDb); - assert.eq(foreignTargetColl.find().itcount(), 4); + assert.eq(foreignTargetColl.find().itcount(), 1); } else { // Implicit database creation is prohibited in a cluster. - let error = assert.throws(() => coll.aggregate(pipelineDifferentOutputDb)); + const error = assert.throws(() => coll.aggregate(pipelineDifferentOutputDb)); assert.commandFailedWithCode(error, ErrorCodes.NamespaceNotFound); + + // Explicitly create the collection and database, then fall through to the test below. + assert.commandWorked(foreignTargetColl.insert({val: "forcing database creation"})); } + + // + // Re-run the $out aggregation, which should merge with the existing contents of the + // collection. We rely on implicit _id generation to give us unique _id values. + // + assert.doesNotThrow(() => coll.aggregate(pipelineDifferentOutputDb)); + assert.eq(foreignTargetColl.find().itcount(), 2); }()); diff --git a/jstests/aggregation/sources/out/mode_replace_collection.js b/jstests/aggregation/sources/out/mode_replace_collection.js index 6d254229de2..68cd7a9c2b3 100644 --- a/jstests/aggregation/sources/out/mode_replace_collection.js +++ b/jstests/aggregation/sources/out/mode_replace_collection.js @@ -73,6 +73,37 @@ assert.eq(2, targetColl.getIndexes().length); // + // Test that an $out aggregation succeeds even if the _id is stripped out and the "uniqueKey" + // is the document key. + // + coll.drop(); + targetColl.drop(); + assert.commandWorked(coll.insert({val: "will be removed"})); + assert.doesNotThrow(() => coll.aggregate([ + {$replaceRoot: {newRoot: {name: "kyle"}}}, + {$out: {to: targetColl.getName(), mode: "replaceCollection"}} + ])); + assert.eq(1, targetColl.find({name: "kyle", val: {$exists: false}}).itcount()); + + // + // Test that an $out aggregation succeeds even if the _id is stripped out and _id is part of a + // multi-field "uniqueKey". + // + targetColl.drop(); + assert.commandWorked(targetColl.createIndex({name: -1, _id: -1}, {unique: true})); + assert.doesNotThrow(() => coll.aggregate([ + {$replaceRoot: {newRoot: {name: "jungsoo"}}}, + { + $out: { + to: targetColl.getName(), + mode: "replaceCollection", + uniqueKey: {_id: 1, name: 1} + } + } + ])); + assert.eq(1, targetColl.find({val: {$exists: false}}).itcount()); + + // // Tests for $out to a database that differs from the aggregation database. // const foreignDb = db.getSiblingDB("mode_replace_collection_foreign"); diff --git a/jstests/aggregation/sources/out/mode_replace_documents.js b/jstests/aggregation/sources/out/mode_replace_documents.js index fe5f54eb1e0..4281b2b7c08 100644 --- a/jstests/aggregation/sources/out/mode_replace_documents.js +++ b/jstests/aggregation/sources/out/mode_replace_documents.js @@ -39,20 +39,60 @@ ]); assert.eq([{_id: 0, a: {b: 1}, c: 1}], outColl.find().toArray()); - // TODO SERVER-36100: 'replaceDocuments' mode should allow a missing "_id" unique key. - assertErrorCode(coll, - [ - {$project: {_id: 0}}, - { - $out: { - to: outColl.getName(), - mode: "replaceDocuments", - } - } - ], - 50905); + // Test that 'replaceDocuments' mode will automatically generate a missing "_id" uniqueKey. + coll.drop(); + outColl.drop(); + assert.commandWorked(coll.insert({field: "will be removed"})); + assert.doesNotThrow(() => coll.aggregate([ + {$replaceRoot: {newRoot: {}}}, + { + $out: { + to: outColl.getName(), + mode: "replaceDocuments", + } + } + ])); + assert.eq(1, outColl.find({field: {$exists: false}}).itcount()); + + // Test that 'replaceDocuments' mode will automatically generate a missing "_id", and the + // aggregation succeeds with a multi-field uniqueKey. + outColl.drop(); + assert.commandWorked(outColl.createIndex({name: -1, _id: 1}, {unique: true, sparse: true})); + assert.doesNotThrow(() => coll.aggregate([ + {$replaceRoot: {newRoot: {name: "jungsoo"}}}, + { + $out: { + to: outColl.getName(), + mode: "replaceDocuments", + uniqueKey: {_id: 1, name: 1}, + } + } + ])); + assert.eq(1, outColl.find().itcount()); + + // Test that we will not attempt to modify the _id of an existing document if the _id is + // projected away but the uniqueKey does not involve _id. + coll.drop(); + assert.commandWorked(coll.insert({name: "kyle"})); + assert.commandWorked(coll.insert({name: "nick"})); + outColl.drop(); + assert.commandWorked(outColl.createIndex({name: 1}, {unique: true})); + assert.commandWorked(outColl.insert({_id: "must be unchanged", name: "kyle"})); + assert.doesNotThrow(() => coll.aggregate([ + {$project: {_id: 0}}, + {$addFields: {newField: 1}}, + {$out: {to: outColl.getName(), mode: "replaceDocuments", uniqueKey: {name: 1}}} + ])); + const outResult = outColl.find().sort({name: 1}).toArray(); + const errmsgFn = () => tojson(outResult); + assert.eq(2, outResult.length, errmsgFn); + assert.docEq({_id: "must be unchanged", name: "kyle", newField: 1}, outResult[0], errmsgFn); + assert.eq("nick", outResult[1].name, errmsgFn); + assert.eq(1, outResult[1].newField, errmsgFn); + assert.neq(null, outResult[1]._id, errmsgFn); // Test that 'replaceDocuments' mode with a missing non-id unique key fails. + outColl.drop(); assert.commandWorked(outColl.createIndex({missing: 1}, {unique: true})); assertErrorCode( coll, @@ -115,18 +155,21 @@ // doesn't exist. coll.aggregate(pipelineDifferentOutputDb); assert.eq(foreignTargetColl.find().itcount(), 1); - - // Insert a new document into the source collection, then test that running the same - // aggregation will replace existing documents in the foreign output collection when - // applicable. - coll.drop(); - const newDocuments = [{_id: 0, newField: 1}, {_id: 1}]; - assert.commandWorked(coll.insert(newDocuments)); - coll.aggregate(pipelineDifferentOutputDb); - assert.eq(foreignTargetColl.find().sort({_id: 1}).toArray(), newDocuments); } else { // Implicit database creation is prohibited in a cluster. let error = assert.throws(() => coll.aggregate(pipelineDifferentOutputDb)); assert.commandFailedWithCode(error, ErrorCodes.NamespaceNotFound); + + // Force a creation of the database and collection, then fall through the test below. + assert.commandWorked(foreignTargetColl.insert({_id: 0})); } + + // Insert a new document into the source collection, then test that running the same + // aggregation will replace existing documents in the foreign output collection when + // applicable. + coll.drop(); + const newDocuments = [{_id: 0, newField: 1}, {_id: 1}]; + assert.commandWorked(coll.insert(newDocuments)); + coll.aggregate(pipelineDifferentOutputDb); + assert.eq(foreignTargetColl.find().sort({_id: 1}).toArray(), newDocuments); }()); diff --git a/src/mongo/db/pipeline/document_source_out.cpp b/src/mongo/db/pipeline/document_source_out.cpp index 38b0e911734..4a3e4b4469a 100644 --- a/src/mongo/db/pipeline/document_source_out.cpp +++ b/src/mongo/db/pipeline/document_source_out.cpp @@ -127,6 +127,13 @@ DocumentSource::GetNextResult DocumentSourceOut::getNext() { for (; nextInput.isAdvanced(); nextInput = pSource->getNext()) { auto doc = nextInput.releaseDocument(); + // Generate an _id if the uniqueKey includes _id but the document doesn't have one. + if (_uniqueKeyIncludesId && doc.getField("_id"_sd).missing()) { + MutableDocument mutableDoc(std::move(doc)); + mutableDoc["_id"_sd] = Value(OID::gen()); + doc = mutableDoc.freeze(); + } + // Extract the unique key before converting the document to BSON. auto uniqueKey = document_path_support::extractPathsFromDoc(doc, _uniqueKeyFields); auto insertObj = doc.toBson(); @@ -220,7 +227,8 @@ DocumentSourceOut::DocumentSourceOut(NamespaceString outputNs, _done(false), _outputNs(std::move(outputNs)), _mode(mode), - _uniqueKeyFields(std::move(uniqueKey)) {} + _uniqueKeyFields(std::move(uniqueKey)), + _uniqueKeyIncludesId(_uniqueKeyFields.count("_id") == 1) {} intrusive_ptr<DocumentSource> DocumentSourceOut::createFromBson( BSONElement elem, const intrusive_ptr<ExpressionContext>& expCtx) { diff --git a/src/mongo/db/pipeline/document_source_out.h b/src/mongo/db/pipeline/document_source_out.h index 45a631cabd4..853d2b3a9e9 100644 --- a/src/mongo/db/pipeline/document_source_out.h +++ b/src/mongo/db/pipeline/document_source_out.h @@ -190,6 +190,10 @@ private: // with this key pattern (up to order). Default is "_id" for unsharded collections, and "_id" // plus the shard key for sharded collections. std::set<FieldPath> _uniqueKeyFields; + + // True if '_uniqueKeyFields' contains the _id. We store this as a separate boolean to avoid + // repeated lookups into the set. + bool _uniqueKeyIncludesId; }; } // namespace mongo |