diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2015-07-20 15:59:22 -0400 |
---|---|---|
committer | Charlie Swanson <charlie.swanson@mongodb.com> | 2015-07-20 15:59:22 -0400 |
commit | 4f1dda92ed5976ab950e2eaaaffd7bd20296a066 (patch) | |
tree | fada7dc1789ef2e5e037029deb542ea4fc5670e5 | |
parent | 824776f67165d4ee0447c1e855dcd8a79643b73c (diff) | |
download | mongo-4f1dda92ed5976ab950e2eaaaffd7bd20296a066.tar.gz |
Revert "$sample"
This reverts commit 824776f67165d4ee0447c1e855dcd8a79643b73c.
-rw-r--r-- | jstests/aggregation/bugs/server533.js | 32 | ||||
-rw-r--r-- | jstests/aggregation/testshard1.js | 9 | ||||
-rw-r--r-- | src/mongo/db/pipeline/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document.h | 12 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_internal.h | 31 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source.h | 46 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sample.cpp | 117 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.cpp | 43 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_test.cpp | 232 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_value_test.cpp | 105 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.h | 10 |
13 files changed, 35 insertions, 675 deletions
diff --git a/jstests/aggregation/bugs/server533.js b/jstests/aggregation/bugs/server533.js deleted file mode 100644 index 3b8101444e0..00000000000 --- a/jstests/aggregation/bugs/server533.js +++ /dev/null @@ -1,32 +0,0 @@ -// SERVER-533: Aggregation stage to randomly sample documents. - -// For assertErrorCode. -load('jstests/aggregation/extras/utils.js'); - -(function() { - 'use strict'; - - var coll = db.agg_sample; - coll.drop(); - - var nItems = 3; - for (var i = 0; i < nItems; i++) { - assert.writeOK(coll.insert({_id: i})); - } - - [0, 1, nItems, nItems + 1].forEach(function(size) { - var results = coll.aggregate([{$sample: {size: size}}]).toArray(); - assert.eq(results.length, Math.min(size, nItems)); - }); - - // Multiple $sample stages are allowed. - var results = coll.aggregate([{$sample: {size: nItems}}, {$sample: {size: 1}}]).toArray(); - assert.eq(results.length, 1); - - // Invalid options. - assertErrorCode(coll, [{$sample: 'string'}], 28739); - assertErrorCode(coll, [{$sample: {size: 'string'}}], 28740); - assertErrorCode(coll, [{$sample: {size: -1}}], 28741); - assertErrorCode(coll, [{$sample: {unknownOpt: true}}], 28742); - assertErrorCode(coll, [{$sample: {/* no size */}}], 28743); -}()); diff --git a/jstests/aggregation/testshard1.js b/jstests/aggregation/testshard1.js index ab49ad5653a..71271ca0af5 100644 --- a/jstests/aggregation/testshard1.js +++ b/jstests/aggregation/testshard1.js @@ -226,15 +226,6 @@ function testAvgStdDev() { } testAvgStdDev(); -function testSample() { - jsTestLog('testing $sample'); - [0, 1, 10, nItems, nItems + 1].forEach(function(size) { - var res = db.ts1.aggregate([{$sample: {size: size}}]).toArray(); - assert.eq(res.length, Math.min(nItems, size)); - }); -} -testSample(); - jsTestLog('test $out by copying source collection verbatim to output'); var outCollection = db.ts1_out; var res = aggregateOrdered(db.ts1, [{$out: outCollection.getName()}]); diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index f84f7a20a40..20780b2ded4 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -106,7 +106,6 @@ docSourceEnv.Library( 'document_source_out.cpp', 'document_source_project.cpp', 'document_source_redact.cpp', - 'document_source_sample.cpp', 'document_source_skip.cpp', 'document_source_sort.cpp', 'document_source_unwind.cpp', diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp index a0dd7dc8287..49efa640277 100644 --- a/src/mongo/db/pipeline/document.cpp +++ b/src/mongo/db/pipeline/document.cpp @@ -193,9 +193,8 @@ intrusive_ptr<DocumentStorage> DocumentStorage::clone() const { out->_usedBytes = _usedBytes; out->_numFields = _numFields; out->_hashTabMask = _hashTabMask; - out->_metaFields = _metaFields; + out->_hasTextScore = _hasTextScore; out->_textScore = _textScore; - out->_randVal = _randVal; // Tell values that they have been memcpyed (updates ref counts) for (DocumentStorageIterator it = out->iteratorAll(); !it.atEnd(); it.advance()) { @@ -245,15 +244,12 @@ BSONObj Document::toBson() const { } const StringData Document::metaFieldTextScore("$textScore", StringData::LiteralTag()); -const StringData Document::metaFieldRandVal("$randVal", StringData::LiteralTag()); BSONObj Document::toBsonWithMetaData() const { BSONObjBuilder bb; toBson(&bb); if (hasTextScore()) bb.append(metaFieldTextScore, getTextScore()); - if (hasRandMetaField()) - bb.append(metaFieldRandVal, static_cast<long long>(getRandMetaField())); return bb.obj(); } @@ -263,19 +259,15 @@ Document Document::fromBsonWithMetaData(const BSONObj& bson) { BSONObjIterator it(bson); while (it.more()) { BSONElement elem(it.next()); - auto fieldName = elem.fieldNameStringData(); - if (fieldName[0] == '$') { - if (fieldName == metaFieldTextScore) { + if (elem.fieldName()[0] == '$') { + if (elem.fieldNameStringData() == metaFieldTextScore) { md.setTextScore(elem.Double()); continue; - } else if (fieldName == metaFieldRandVal) { - md.setRandMetaField(static_cast<int64_t>(elem.numberLong())); - continue; } } // Note: this will not parse out metadata in embedded documents. - md.addField(fieldName, Value(elem)); + md.addField(elem.fieldNameStringData(), Value(elem)); } return md.freeze(); @@ -433,14 +425,11 @@ void Document::serializeForSorter(BufBuilder& buf) const { } if (hasTextScore()) { - buf.appendNum(char(DocumentStorage::MetaType::TEXT_SCORE + 1)); + buf.appendNum(char(1)); buf.appendNum(getTextScore()); + } else { + buf.appendNum(char(0)); } - if (hasRandMetaField()) { - buf.appendNum(char(DocumentStorage::MetaType::RAND_VAL + 1)); - buf.appendNum(static_cast<long long>(getRandMetaField())); - } - buf.appendNum(char(0)); } Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&) { @@ -451,15 +440,8 @@ Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeS doc.addField(name, Value::deserializeForSorter(buf, Value::SorterDeserializeSettings())); } - while (char marker = buf.read<char>()) { - if (marker == char(DocumentStorage::MetaType::TEXT_SCORE) + 1) { - doc.setTextScore(buf.read<double>()); - } else if (marker == char(DocumentStorage::MetaType::RAND_VAL) + 1) { - doc.setRandMetaField(buf.read<int64_t>()); - } else { - uasserted(28744, "Unrecognized marker, unable to deserialize buffer"); - } - } + if (buf.read<char>()) // hasTextScore + doc.setTextScore(buf.read<double>()); return doc.freeze(); } diff --git a/src/mongo/db/pipeline/document.h b/src/mongo/db/pipeline/document.h index c6a100c954b..5010f69b5fa 100644 --- a/src/mongo/db/pipeline/document.h +++ b/src/mongo/db/pipeline/document.h @@ -201,14 +201,6 @@ public: return storage().getTextScore(); } - static const StringData metaFieldRandVal; // "$randVal" - bool hasRandMetaField() const { - return storage().hasRandMetaField(); - } - int64_t getRandMetaField() const { - return storage().getRandMetaField(); - } - /// members for Sorter struct SorterDeserializeSettings {}; // unused void serializeForSorter(BufBuilder& buf) const; @@ -429,10 +421,6 @@ public: storage().setTextScore(score); } - void setRandMetaField(int64_t val) { - storage().setRandMetaField(val); - } - /** Convert to a read-only document and release reference. * * Call this to indicate that you are done with this Document and will diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h index 1c0d1a13566..4ccb61b8f9b 100644 --- a/src/mongo/db/pipeline/document_internal.h +++ b/src/mongo/db/pipeline/document_internal.h @@ -31,7 +31,6 @@ #include <third_party/murmurhash3/MurmurHash3.h> #include <boost/intrusive_ptr.hpp> -#include <bitset> #include "mongo/util/intrusive_counter.h" #include "mongo/db/pipeline/value.h" @@ -189,17 +188,10 @@ public: _usedBytes(0), _numFields(0), _hashTabMask(0), - _metaFields(), + _hasTextScore(false), _textScore(0) {} ~DocumentStorage(); - enum MetaType : char { - TEXT_SCORE, - RAND_VAL, - - NUM_FIELDS - }; - static const DocumentStorage& emptyDoc() { static const char emptyBytes[sizeof(DocumentStorage)] = {0}; return *reinterpret_cast<const DocumentStorage*>(emptyBytes); @@ -278,33 +270,19 @@ public: if (source.hasTextScore()) { setTextScore(source.getTextScore()); } - if (source.hasRandMetaField()) { - setRandMetaField(source.getRandMetaField()); - } } bool hasTextScore() const { - return _metaFields.test(MetaType::TEXT_SCORE); + return _hasTextScore; } double getTextScore() const { return _textScore; } void setTextScore(double score) { - _metaFields.set(MetaType::TEXT_SCORE); + _hasTextScore = true; _textScore = score; } - bool hasRandMetaField() const { - return _metaFields.test(MetaType::RAND_VAL); - } - int64_t getRandMetaField() const { - return _randVal; - } - void setRandMetaField(int64_t val) { - _metaFields.set(MetaType::RAND_VAL); - _randVal = val; - } - private: /// Same as lastElement->next() or firstElement() if empty. const ValueElement* end() const { @@ -383,9 +361,8 @@ private: unsigned _numFields; // this includes removed fields unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often - std::bitset<MetaType::NUM_FIELDS> _metaFields; + bool _hasTextScore; // When adding more metadata fields, this should become a bitvector double _textScore; - int64_t _randVal; // When adding a field, make sure to update clone() method }; } diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index 81171f234e0..a6c4bdbb2d8 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -818,29 +818,6 @@ public: /// returns -1 for no limit long long getLimit() const; - /** - * Loads a document to be sorted. This can be used to sort a stream of documents that are not - * coming from another DocumentSource. Once all documents have been added, the caller must call - * loadingDone() before using getNext() to receive the documents in sorted order. - */ - void loadDocument(const Document& doc); - - /** - * Signals to the sort stage that there will be no more input documents. It is an error to call - * loadDocument() once this method returns. - */ - void loadingDone(); - - /** - * Instructs the sort stage to use the given set of cursors as inputs, to merge documents that - * have already been sorted. - */ - void populateFromCursors(const std::vector<DBClientCursor*>& cursors); - - bool isPopulated() { - return populated; - }; - boost::intrusive_ptr<DocumentSourceLimit> getLimitSrc() const { return limitSrc; } @@ -865,6 +842,7 @@ private: // This is used to merge pre-sorted results from a DocumentSourceMergeCursors. class IteratorFromCursor; + void populateFromCursors(const std::vector<DBClientCursor*>& cursors); /* these two parallel each other */ typedef std::vector<boost::intrusive_ptr<Expression>> SortKey; @@ -895,31 +873,9 @@ private: bool _done; bool _mergingPresorted; - std::unique_ptr<MySorter> _sorter; std::unique_ptr<MySorter::Iterator> _output; }; -class DocumentSourceSample final : public DocumentSource, public SplittableDocumentSource { -public: - boost::optional<Document> getNext() final; - const char* getSourceName() const final; - Value serialize(bool explain = false) const final; - - boost::intrusive_ptr<DocumentSource> getShardSource() final; - boost::intrusive_ptr<DocumentSource> getMergeSource() final; - - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx); - -private: - explicit DocumentSourceSample(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - long long _size; - - // When no storage engine optimizations are available, $sample uses a $sort stage to randomly - // sort the documents. - boost::intrusive_ptr<DocumentSourceSort> _sortStage; -}; - class DocumentSourceLimit final : public DocumentSource, public SplittableDocumentSource { public: // virtuals from DocumentSource diff --git a/src/mongo/db/pipeline/document_source_sample.cpp b/src/mongo/db/pipeline/document_source_sample.cpp deleted file mode 100644 index c97fec0253e..00000000000 --- a/src/mongo/db/pipeline/document_source_sample.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/** - * Copyright (c) 2011 10gen Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/platform/basic.h" - -#include "mongo/db/pipeline/document_source.h" - -#include <vector> - -#include "mongo/db/client.h" -#include "mongo/db/pipeline/document.h" -#include "mongo/db/pipeline/expression.h" -#include "mongo/db/pipeline/expression_context.h" -#include "mongo/db/pipeline/value.h" - -namespace mongo { -using boost::intrusive_ptr; - -DocumentSourceSample::DocumentSourceSample(const intrusive_ptr<ExpressionContext>& pExpCtx) - : DocumentSource(pExpCtx), _size(0) {} - -REGISTER_DOCUMENT_SOURCE(sample, DocumentSourceSample::createFromBson); - -const char* DocumentSourceSample::getSourceName() const { - return "$sample"; -} - -boost::optional<Document> DocumentSourceSample::getNext() { - if (_size == 0) - return {}; - - pExpCtx->checkForInterrupt(); - - if (!_sortStage->isPopulated()) { - // Exhaust source stage, add random metadata, and push all into sorter. - PseudoRandom& prng = pExpCtx->opCtx->getClient()->getPrng(); - while (boost::optional<Document> next = pSource->getNext()) { - MutableDocument doc(std::move(*next)); - // Add random metadata field. - doc.setRandMetaField(prng.nextInt64()); - _sortStage->loadDocument(doc.freeze()); - } - _sortStage->loadingDone(); - } - - invariant(_sortStage->isPopulated()); - return _sortStage->getNext(); -} - -Value DocumentSourceSample::serialize(bool explain) const { - return Value(DOC(getSourceName() << DOC("size" << _size))); -} - -namespace { -const BSONObj randSortSpec = BSON("$rand" << BSON("$meta" - << "randVal")); -} // namespace - -intrusive_ptr<DocumentSource> DocumentSourceSample::createFromBson( - BSONElement specElem, const intrusive_ptr<ExpressionContext>& expCtx) { - uassert(28739, "the $sample stage specification must be an object", specElem.type() == Object); - intrusive_ptr<DocumentSourceSample> sample(new DocumentSourceSample(expCtx)); - - bool sizeSpecified = false; - for (auto&& elem : specElem.embeddedObject()) { - auto fieldName = elem.fieldNameStringData(); - - if (fieldName == "size") { - uassert(28740, "size argument to $sample must be a number", elem.isNumber()); - uassert(28741, "size argument to $sample must not be negative", elem.numberLong() >= 0); - sample->_size = elem.numberLong(); - sizeSpecified = true; - } else { - uasserted(28742, str::stream() << "unrecognized option to $sample: " << fieldName); - } - } - uassert(28743, "$sample stage must specify a size", sizeSpecified); - - sample->_sortStage = DocumentSourceSort::create(expCtx, randSortSpec, sample->_size); - - return sample; -} - -intrusive_ptr<DocumentSource> DocumentSourceSample::getShardSource() { - return this; -} - -intrusive_ptr<DocumentSource> DocumentSourceSample::getMergeSource() { - // Just need to merge the pre-sorted documents by their random values. - return DocumentSourceSort::create(pExpCtx, randSortSpec, _size); -} -} // mongo diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index 58fd0a2980b..d1c0a0e5a21 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -45,9 +45,6 @@ using std::make_pair; using std::string; using std::vector; -DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext>& pExpCtx) - : DocumentSource(pExpCtx), populated(false), _mergingPresorted(false) {} - REGISTER_DOCUMENT_SOURCE(sort, DocumentSourceSort::createFromBson); const char* DocumentSourceSort::getSourceName() const { @@ -90,6 +87,9 @@ void DocumentSourceSort::dispose() { pSource->dispose(); } +DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), populated(false), _mergingPresorted(false) {} + long long DocumentSourceSort::getLimit() const { return limitSrc ? limitSrc->getLimit() : -1; } @@ -162,19 +162,14 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( } if (keyField.type() == Object) { - BSONObj metaDoc = keyField.Obj(); // this restriction is due to needing to figure out sort direction uassert(17312, - "$meta is the only expression supported by $sort right now", - metaDoc.firstElement().fieldNameStringData() == "$meta"); - - VariablesIdGenerator idGen; - VariablesParseState vps(&idGen); - pSort->vSortKey.push_back(ExpressionMeta::parse(metaDoc.firstElement(), vps)); + "the only expression supported by $sort right now is {$meta: 'textScore'}", + keyField.Obj() == BSON("$meta" + << "textScore")); - // If sorting by textScore, sort highest scores first. If sorting by randVal, order - // doesn't matter, so just always use descending. - pSort->vAscending.push_back(false); + pSort->vSortKey.push_back(new ExpressionMeta()); + pSort->vAscending.push_back(false); // best scoring documents first continue; } @@ -228,27 +223,12 @@ void DocumentSourceSort::populate() { msgasserted(17196, "can only mergePresorted from MergeCursors"); } } else { + unique_ptr<MySorter> sorter(MySorter::make(makeSortOptions(), Comparator(*this))); while (boost::optional<Document> next = pSource->getNext()) { - loadDocument(std::move(*next)); + sorter->add(extractKey(*next), *next); } - loadingDone(); + _output.reset(sorter->done()); } -} - -void DocumentSourceSort::loadDocument(const Document& doc) { - invariant(!populated); - if (!_sorter) { - _sorter.reset(MySorter::make(makeSortOptions(), Comparator(*this))); - } - _sorter->add(extractKey(doc), doc); -} - -void DocumentSourceSort::loadingDone() { - if (!_sorter) { - _sorter.reset(MySorter::make(makeSortOptions(), Comparator(*this))); - } - _output.reset(_sorter->done()); - _sorter.reset(); populated = true; } @@ -277,7 +257,6 @@ void DocumentSourceSort::populateFromCursors(const vector<DBClientCursor*>& curs } _output.reset(MySorter::Iterator::merge(iterators, makeSortOptions(), Comparator(*this))); - populated = true; } Value DocumentSourceSort::extractKey(const Document& d) const { diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp index a82c31888ec..b4060023c16 100644 --- a/src/mongo/db/pipeline/document_source_test.cpp +++ b/src/mongo/db/pipeline/document_source_test.cpp @@ -32,9 +32,6 @@ #include "mongo/db/pipeline/dependencies.h" #include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/expression_context.h" -#include "mongo/db/service_context.h" -#include "mongo/db/service_context_noop.h" -#include "mongo/db/storage_options.h" #include "mongo/dbtests/dbtests.h" #include "mongo/unittest/temp_dir.h" @@ -154,20 +151,13 @@ using mongo::DocumentSourceMock; class Base { public: - Base() - : _service(), - _client(_service.makeClient("DocumentSourceTest")), - _opCtx(_client->makeOperationContext()), - _ctx(new ExpressionContext(_opCtx.get(), NamespaceString(ns))) {} + Base() : _ctx(new ExpressionContext(&_opCtx, NamespaceString(ns))) {} protected: intrusive_ptr<ExpressionContext> ctx() { return _ctx; } - - ServiceContextNoop _service; - ServiceContext::UniqueClient _client; - ServiceContext::UniqueOperationContext _opCtx; + OperationContextNoop _opCtx; private: intrusive_ptr<ExpressionContext> _ctx; @@ -296,7 +286,7 @@ protected: BSONElement specElement = namedSpec.firstElement(); intrusive_ptr<ExpressionContext> expressionContext = - new ExpressionContext(_opCtx.get(), NamespaceString(ns)); + new ExpressionContext(&_opCtx, NamespaceString(ns)); expressionContext->inShard = inShard; // Won't spill to disk properly if it needs to. expressionContext->tempDir = _tempDir.path(); @@ -1027,173 +1017,6 @@ public: } // namespace DocumentSourceProject -namespace DocumentSourceSample { - -using mongo::DocumentSourceSample; -using mongo::DocumentSourceMock; - -class SampleBasics : public Mock::Base, public unittest::Test { -public: - SampleBasics() : _mock(DocumentSourceMock::create()) {} - -protected: - void createSample(long long size) { - BSONObj spec = BSON("$sample" << BSON("size" << size)); - BSONElement specElement = spec.firstElement(); - _sample = DocumentSourceSample::createFromBson(specElement, ctx()); - sample()->setSource(_mock.get()); - checkBsonRepresentation(spec); - } - - DocumentSourceSample* sample() { - return static_cast<DocumentSourceSample*>(_sample.get()); - } - - DocumentSourceMock* source() { - return _mock.get(); - } - - /** - * Makes some general assertions about the results of a $sample stage. - * - * Creates a $sample stage with the given size, advances it 'nExpectedResults' times, asserting - * the results come back in sorted order according to their assigned random values, then asserts - * the stage is exhausted. - */ - void checkResults(long long size, long long nExpectedResults) { - createSample(size); - - boost::optional<Document> prevDoc; - for (long long i = 0; i < nExpectedResults; i++) { - auto thisDoc = sample()->getNext(); - ASSERT_TRUE(bool(thisDoc)); - ASSERT_TRUE(thisDoc->hasRandMetaField()); - if (prevDoc) { - ASSERT_LTE(thisDoc->getRandMetaField(), prevDoc->getRandMetaField()); - } - prevDoc = std::move(thisDoc); - } - assertExhausted(); - } - - /** - * Helper to load 'nDocs' documents into the source stage. - */ - void loadDocuments(int nDocs) { - for (int i = 0; i < nDocs; i++) { - _mock->queue.push_back(DOC("_id" << i)); - } - } - - /** - * Assert that iterator state accessors consistently report the source is exhausted. - */ - void assertExhausted() const { - ASSERT(!_sample->getNext()); - ASSERT(!_sample->getNext()); - ASSERT(!_sample->getNext()); - } - -private: - /** - * Check that the BSON representation generated by the souce matches the BSON it was - * created with. - */ - void checkBsonRepresentation(const BSONObj& spec) { - Value serialized = sample()->serialize(false); - auto generatedSpec = serialized.getDocument().toBson(); - ASSERT_EQUALS(spec, generatedSpec); - } - - intrusive_ptr<DocumentSource> _sample; - intrusive_ptr<DocumentSourceMock> _mock; -}; - -/** - * A sample of size 0 should return 0 results. - */ -TEST_F(SampleBasics, ZeroSize) { - loadDocuments(2); - checkResults(0, 0); -} - -/** - * If the source stage is exhausted, the $sample stage should also be exhausted. - */ -TEST_F(SampleBasics, SourceExhaustedBeforeSample) { - loadDocuments(5); - checkResults(10, 5); -} - -/** - * A $sample stage should limit the number of results to the given size. - */ -TEST_F(SampleBasics, SampleExhaustedBeforeSource) { - loadDocuments(10); - checkResults(5, 5); -} - -/** - * The incoming documents should not be modified by a $sample stage (except their metadata). - */ -TEST_F(SampleBasics, DocsUnmodified) { - createSample(1); - source()->queue.push_back(DOC("a" << 1 << "b" << DOC("c" << 2))); - auto next = sample()->getNext(); - ASSERT_TRUE(bool(next)); - Document doc = *next; - ASSERT_EQUALS(1, doc["a"].getInt()); - ASSERT_EQUALS(2, doc["b"]["c"].getInt()); - ASSERT_TRUE(doc.hasRandMetaField()); - assertExhausted(); -} - -/** - * Fixture to test error cases of the $sample stage. - */ -class InvalidSampleSpec : public Mock::Base, public unittest::Test { -public: - intrusive_ptr<DocumentSource> createSample(BSONObj sampleSpec) { - auto specElem = sampleSpec.firstElement(); - return DocumentSourceSample::createFromBson(specElem, ctx()); - } - - BSONObj createSpec(BSONObj spec) { - return BSON("$sample" << spec); - } -}; - -TEST_F(InvalidSampleSpec, NonObject) { - ASSERT_THROWS_CODE(createSample(BSON("$sample" << 1)), UserException, 28739); - ASSERT_THROWS_CODE(createSample(BSON("$sample" - << "string")), - UserException, - 28739); -} - -TEST_F(InvalidSampleSpec, NonNumericSize) { - ASSERT_THROWS_CODE(createSample(createSpec(BSON("size" - << "string"))), - UserException, - 28740); -} - -TEST_F(InvalidSampleSpec, NegativeSize) { - ASSERT_THROWS_CODE(createSample(createSpec(BSON("size" << -1))), UserException, 28741); - ASSERT_THROWS_CODE(createSample(createSpec(BSON("size" << -1.0))), UserException, 28741); -} - -TEST_F(InvalidSampleSpec, ExtraOption) { - ASSERT_THROWS_CODE( - createSample(createSpec(BSON("size" << 1 << "extra" << 2))), UserException, 28742); -} - -TEST_F(InvalidSampleSpec, MissingSize) { - ASSERT_THROWS_CODE(createSample(createSpec(BSONObj())), UserException, 28743); -} - -} // namespace DocumentSourceSample - namespace DocumentSourceSort { using mongo::DocumentSourceSort; @@ -1518,53 +1341,6 @@ class NullValue : public CheckResultsBase { } }; -/** - * Order by text score. - */ -class TextScore : public CheckResultsBase { - std::deque<Document> inputData() { - MutableDocument first; - first["_id"] = Value(0); - first.setTextScore(10); - MutableDocument second; - second["_id"] = Value(1); - second.setTextScore(20); - return {first.freeze(), second.freeze()}; - } - - string expectedResultSetString() { - return "[{_id:1},{_id:0}]"; - } - - BSONObj sortSpec() { - return BSON("$computed0" << metaTextScore); - } -}; - -/** - * Order by random value in metadata. - */ -class RandMeta : public CheckResultsBase { - std::deque<Document> inputData() { - MutableDocument first; - first["_id"] = Value(0); - first.setRandMetaField(10); - MutableDocument second; - second["_id"] = Value(1); - second.setRandMetaField(20); - return {first.freeze(), second.freeze()}; - } - - string expectedResultSetString() { - return "[{_id:1},{_id:0}]"; - } - - BSONObj sortSpec() { - return BSON("$computed0" << BSON("$meta" - << "randVal")); - } -}; - /** A missing nested object within an array returns an empty array. */ class MissingObjectWithinArray : public CheckResultsBase { std::deque<Document> inputData() { @@ -2132,8 +1908,6 @@ public: add<DocumentSourceSort::MixedNumericSort>(); add<DocumentSourceSort::MissingValue>(); add<DocumentSourceSort::NullValue>(); - add<DocumentSourceSort::TextScore>(); - add<DocumentSourceSort::RandMeta>(); add<DocumentSourceSort::MissingObjectWithinArray>(); add<DocumentSourceSort::ExtractArrayValues>(); add<DocumentSourceSort::Dependencies>(); diff --git a/src/mongo/db/pipeline/document_value_test.cpp b/src/mongo/db/pipeline/document_value_test.cpp index 6b2269948e6..544ac629a9b 100644 --- a/src/mongo/db/pipeline/document_value_test.cpp +++ b/src/mongo/db/pipeline/document_value_test.cpp @@ -432,111 +432,6 @@ public: }; } // namespace Document -namespace MetaFields { -using mongo::Document; -TEST(MetaFields, TextScoreBasics) { - // Documents should not have a text score until it is set. - ASSERT_FALSE(Document().hasTextScore()); - - // Setting the text score should work as expected. - MutableDocument docBuilder; - docBuilder.setTextScore(1.0); - Document doc = docBuilder.freeze(); - ASSERT_TRUE(doc.hasTextScore()); - ASSERT_EQ(1.0, doc.getTextScore()); -} - -TEST(MetaFields, RandValBasics) { - // Documents should not have a random value until it is set. - ASSERT_FALSE(Document().hasRandMetaField()); - - // Setting the random value field should work as expected. - MutableDocument docBuilder; - docBuilder.setRandMetaField(1); - Document doc = docBuilder.freeze(); - ASSERT_TRUE(doc.hasRandMetaField()); - ASSERT_EQ(1, doc.getRandMetaField()); - - // Setting the random value twice should keep the second value. - MutableDocument docBuilder2; - docBuilder2.setRandMetaField(1); - docBuilder2.setRandMetaField(2); - Document doc2 = docBuilder2.freeze(); - ASSERT_TRUE(doc2.hasRandMetaField()); - ASSERT_EQ(2, doc2.getRandMetaField()); -} - -class SerializationTest : public unittest::Test { -protected: - Document roundTrip(const Document& input) { - BufBuilder bb; - input.serializeForSorter(bb); - BufReader reader(bb.buf(), bb.len()); - return Document::deserializeForSorter(reader, Document::SorterDeserializeSettings()); - } - - void assertRoundTrips(const Document& input) { - // Round trip to/from a buffer. - auto output = roundTrip(input); - ASSERT_EQ(output, input); - ASSERT_EQ(output.hasTextScore(), input.hasTextScore()); - ASSERT_EQ(output.hasRandMetaField(), input.hasRandMetaField()); - if (input.hasTextScore()) - ASSERT_EQ(output.getTextScore(), input.getTextScore()); - if (input.hasRandMetaField()) - ASSERT_EQ(output.getRandMetaField(), input.getRandMetaField()); - - ASSERT(output.toBson().binaryEqual(input.toBson())); - } -}; - -TEST_F(SerializationTest, MetaSerializationNoVals) { - MutableDocument docBuilder; - docBuilder.setTextScore(10.0); - docBuilder.setRandMetaField(20); - assertRoundTrips(docBuilder.freeze()); -} - -TEST_F(SerializationTest, MetaSerializationWithVals) { - // Same as above test, but add a non-meta field as well. - MutableDocument docBuilder(DOC("foo" << 10)); - docBuilder.setTextScore(10.0); - docBuilder.setRandMetaField(20); - assertRoundTrips(docBuilder.freeze()); -} - -TEST(MetaFields, ToAndFromBson) { - MutableDocument docBuilder; - docBuilder.setTextScore(10.0); - docBuilder.setRandMetaField(20); - Document doc = docBuilder.freeze(); - BSONObj obj = doc.toBsonWithMetaData(); - ASSERT_EQ(10.0, obj[Document::metaFieldTextScore].Double()); - ASSERT_EQ(20, obj[Document::metaFieldRandVal].numberLong()); - Document fromBson = Document::fromBsonWithMetaData(obj); - ASSERT_TRUE(fromBson.hasTextScore()); - ASSERT_TRUE(fromBson.hasRandMetaField()); - ASSERT_EQ(10.0, fromBson.getTextScore()); - ASSERT_EQ(20, fromBson.getRandMetaField()); -} - -TEST(MetaFields, BadSerialization) { - // Write an unrecognized option to the buffer. - BufBuilder bb; - // Signal there are 0 fields. - bb.appendNum(0); - // This would specify a meta field with an invalid type. - bb.appendNum(char(DocumentStorage::MetaType::NUM_FIELDS) + 1); - // Signals end of input. - bb.appendNum(char(0)); - BufReader reader(bb.buf(), bb.len()); - ASSERT_THROWS_CODE( - Document::deserializeForSorter(reader, Document::SorterDeserializeSettings()), - UserException, - 28744); -} -} // namespace MetaFields - namespace Value { using mongo::Value; diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index 88a471d0792..92aab58ad61 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -1851,45 +1851,23 @@ REGISTER_EXPRESSION(meta, ExpressionMeta::parse); intrusive_ptr<Expression> ExpressionMeta::parse(BSONElement expr, const VariablesParseState& vpsIn) { uassert(17307, "$meta only supports String arguments", expr.type() == String); - if (expr.valueStringData() == "textScore") { - return new ExpressionMeta(MetaType::TEXT_SCORE); - } else if (expr.valueStringData() == "randVal") { - return new ExpressionMeta(MetaType::RAND_VAL); - } else { - uasserted(17308, "Unsupported argument to $meta: " + expr.String()); - } -} + uassert(17308, "Unsupported argument to $meta: " + expr.String(), expr.String() == "textScore"); -ExpressionMeta::ExpressionMeta(MetaType metaType) : _metaType(metaType) {} + return new ExpressionMeta(); +} Value ExpressionMeta::serialize(bool explain) const { - switch (_metaType) { - case MetaType::TEXT_SCORE: - return Value(DOC("$meta" - << "textScore")); - case MetaType::RAND_VAL: - return Value(DOC("$meta" - << "randVal")); - } - MONGO_UNREACHABLE; + return Value(DOC("$meta" + << "textScore")); } Value ExpressionMeta::evaluateInternal(Variables* vars) const { const Document& root = vars->getRoot(); - switch (_metaType) { - case MetaType::TEXT_SCORE: - return root.hasTextScore() ? Value(root.getTextScore()) : Value(); - case MetaType::RAND_VAL: - return root.hasRandMetaField() ? Value(static_cast<long long>(root.getRandMetaField())) - : Value(); - } - MONGO_UNREACHABLE; + return root.hasTextScore() ? Value(root.getTextScore()) : Value(); } void ExpressionMeta::addDependencies(DepsTracker* deps, vector<string>* path) const { - if (_metaType == MetaType::TEXT_SCORE) { - deps->needTextScore = true; - } + deps->needTextScore = true; } /* ------------------------- ExpressionMillisecond ----------------------------- */ diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index b4caa27ae52..c386cbe80b3 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -795,16 +795,6 @@ public: void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); - -private: - enum MetaType { - TEXT_SCORE, - RAND_VAL, - }; - - ExpressionMeta(MetaType metaType); - - MetaType _metaType; }; class ExpressionMillisecond final : public ExpressionFixedArity<ExpressionMillisecond, 1> { |