diff options
author | Ian Boros <puppyofkosh@gmail.com> | 2019-05-15 19:05:00 -0400 |
---|---|---|
committer | Ian Boros <puppyofkosh@gmail.com> | 2019-05-20 21:38:07 -0400 |
commit | d7c2ad76b418912c16d4250bdbe74f8a9b3f51ca (patch) | |
tree | 4ff2622d3a38fbf67d73f848e2deaf3c6c5e2cc5 | |
parent | cb7eff01f675d854d7484bd68ce298a8817d2af2 (diff) | |
download | mongo-d7c2ad76b418912c16d4250bdbe74f8a9b3f51ca.tar.gz |
SERVER-40555 add searchSnippet metadata
-rw-r--r-- | jstests/core/find_projection_meta_errors.js | 4 | ||||
-rw-r--r-- | jstests/sharding/aggregates_during_balancing.js | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document.cpp | 43 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document.h | 23 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_internal.h | 24 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_value_test.cpp | 96 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.h | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_test.cpp | 28 |
9 files changed, 225 insertions, 8 deletions
diff --git a/jstests/core/find_projection_meta_errors.js b/jstests/core/find_projection_meta_errors.js index bbf91801ee0..6fd69cb9d04 100644 --- a/jstests/core/find_projection_meta_errors.js +++ b/jstests/core/find_projection_meta_errors.js @@ -14,6 +14,10 @@ ErrorCodes.BadValue); assert.commandFailedWithCode( + db.runCommand({find: coll.getName(), projection: {score: {$meta: "searchHighlights"}}}), + ErrorCodes.BadValue); + + assert.commandFailedWithCode( db.runCommand({find: coll.getName(), projection: {score: {$meta: "some garbage"}}}), ErrorCodes.BadValue); }()); diff --git a/jstests/sharding/aggregates_during_balancing.js b/jstests/sharding/aggregates_during_balancing.js index 149f6031583..003674e1530 100644 --- a/jstests/sharding/aggregates_during_balancing.js +++ b/jstests/sharding/aggregates_during_balancing.js @@ -179,7 +179,9 @@ function testSample() { jsTestLog('testing $sample'); [0, 1, 10, nItems, nItems + 1].forEach(function(size) { - var res = db.ts1.aggregate([{$sample: {size: size}}]).toArray(); + // Run with 'allowDiskUse' set to true because this may exceed the in-memory sort + // limit. + var res = db.ts1.aggregate([{$sample: {size: size}}], {allowDiskUse: true}).toArray(); assert.eq(res.length, Math.min(nItems, size)); }); } diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp index e7d74aadbfe..a4c17d92501 100644 --- a/src/mongo/db/pipeline/document.cpp +++ b/src/mongo/db/pipeline/document.cpp @@ -45,12 +45,14 @@ using std::vector; const DocumentStorage DocumentStorage::kEmptyDoc; -const std::vector<StringData> Document::allMetadataFieldNames = {Document::metaFieldTextScore, - Document::metaFieldRandVal, - Document::metaFieldSortKey, - Document::metaFieldGeoNearDistance, - Document::metaFieldGeoNearPoint, - Document::metaFieldSearchScore}; +const std::vector<StringData> Document::allMetadataFieldNames = { + Document::metaFieldGeoNearDistance, + Document::metaFieldGeoNearPoint, + Document::metaFieldRandVal, + Document::metaFieldSearchHighlights, + Document::metaFieldSearchScore, + Document::metaFieldSortKey, + Document::metaFieldTextScore}; Position DocumentStorage::findField(StringData requested) const { int reqSize = requested.size(); // get size calculation out of the way if needed @@ -226,11 +228,24 @@ intrusive_ptr<DocumentStorage> DocumentStorage::clone() const { out->_geoNearDistance = _geoNearDistance; out->_geoNearPoint = _geoNearPoint.getOwned(); out->_searchScore = _searchScore; + out->_searchHighlights = _searchHighlights; } return out; } +size_t DocumentStorage::getMetadataApproximateSize() const { + size_t size = 0; + size += sizeof(_textScore); + size += sizeof(_randVal); + size += _sortKey.objsize(); + size += sizeof(_geoNearDistance); + size += _geoNearPoint.getApproximateSize(); + size += sizeof(_searchScore); + size += _searchHighlights.getApproximateSize(); + return size; +} + DocumentStorage::~DocumentStorage() { std::unique_ptr<char[]> deleteBufferAtScopeEnd(_buffer); @@ -292,6 +307,7 @@ constexpr StringData Document::metaFieldSortKey; constexpr StringData Document::metaFieldGeoNearDistance; constexpr StringData Document::metaFieldGeoNearPoint; constexpr StringData Document::metaFieldSearchScore; +constexpr StringData Document::metaFieldSearchHighlights; BSONObj Document::toBsonWithMetaData() const { BSONObjBuilder bb; @@ -308,6 +324,8 @@ BSONObj Document::toBsonWithMetaData() const { getGeoNearPoint().addToBsonObj(&bb, metaFieldGeoNearPoint); if (hasSearchScore()) bb.append(metaFieldSearchScore, getSearchScore()); + if (hasSearchHighlights()) + getSearchHighlights().addToBsonObj(&bb, metaFieldSearchHighlights); return bb.obj(); } @@ -325,6 +343,9 @@ Document Document::fromBsonWithMetaData(const BSONObj& bson) { } else if (fieldName == metaFieldSearchScore) { md.setSearchScore(elem.Double()); continue; + } else if (fieldName == metaFieldSearchHighlights) { + md.setSearchHighlights(Value(elem)); + continue; } else if (fieldName == metaFieldRandVal) { md.setRandMetaField(elem.Double()); continue; @@ -431,6 +452,9 @@ size_t Document::getApproximateSize() const { size -= sizeof(Value); // already accounted for above } + // The metadata also occupies space in the document storage that's pre-allocated. + size += getMetadataApproximateSize(); + return size; } @@ -526,6 +550,10 @@ void Document::serializeForSorter(BufBuilder& buf) const { buf.appendNum(char(DocumentStorage::MetaType::SEARCH_SCORE + 1)); buf.appendNum(getSearchScore()); } + if (hasSearchHighlights()) { + buf.appendNum(char(DocumentStorage::MetaType::SEARCH_HIGHLIGHTS + 1)); + getSearchHighlights().serializeForSorter(buf); + } buf.appendNum(char(0)); } @@ -547,6 +575,9 @@ Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeS BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings())); } else if (marker == char(DocumentStorage::MetaType::SEARCH_SCORE) + 1) { doc.setSearchScore(buf.read<LittleEndian<double>>()); + } else if (marker == char(DocumentStorage::MetaType::SEARCH_HIGHLIGHTS) + 1) { + doc.setSearchHighlights( + Value::deserializeForSorter(buf, Value::SorterDeserializeSettings())); } else { uasserted(28744, "Unrecognized marker, unable to deserialize buffer"); } diff --git a/src/mongo/db/pipeline/document.h b/src/mongo/db/pipeline/document.h index 6d765a24c65..f465ecce0e2 100644 --- a/src/mongo/db/pipeline/document.h +++ b/src/mongo/db/pipeline/document.h @@ -97,6 +97,7 @@ public: static constexpr StringData metaFieldGeoNearDistance = "$dis"_sd; static constexpr StringData metaFieldGeoNearPoint = "$pt"_sd; static constexpr StringData metaFieldSearchScore = "$searchScore"_sd; + static constexpr StringData metaFieldSearchHighlights = "$searchHighlights"_sd; static const std::vector<StringData> allMetadataFieldNames; @@ -160,10 +161,21 @@ public: /** Get the approximate storage size of the document and sub-values in bytes. * Note: Some memory may be shared with other Documents or between fields within * a single Document so this can overestimate usage. + * + * Note: the value returned by this function includes the size of the metadata associated with + * the document. */ size_t getApproximateSize() const; /** + * Return the approximate amount of space used by metadata. Note that documents may reserve + * space for metadata even no metadata is used. + */ + size_t getMetadataApproximateSize() const { + return storage().getMetadataApproximateSize(); + } + + /** * Compare two documents. Most callers should prefer using DocumentComparator instead. See * document_comparator.h for details. * @@ -291,6 +303,13 @@ public: return storage().getSearchScore(); } + bool hasSearchHighlights() const { + return storage().hasSearchHighlights(); + } + Value getSearchHighlights() const { + return storage().getSearchHighlights(); + } + /// members for Sorter struct SorterDeserializeSettings {}; // unused void serializeForSorter(BufBuilder& buf) const; @@ -554,6 +573,10 @@ public: storage().setSearchScore(score); } + void setSearchHighlights(Value highlights) { + storage().setSearchHighlights(highlights); + } + /** Convert to a read-only document and release reference. * * Call this to indicate that you are done with this Document and will diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h index 71b28d2aeb8..1d2b1b58951 100644 --- a/src/mongo/db/pipeline/document_internal.h +++ b/src/mongo/db/pipeline/document_internal.h @@ -204,6 +204,7 @@ public: GEONEAR_DIST, GEONEAR_POINT, SEARCH_SCORE, + SEARCH_HIGHLIGHTS, // New fields must be added before the NUM_FIELDS sentinel. NUM_FIELDS @@ -279,6 +280,12 @@ public: } /** + * Compute the space allocated for the metadata fields. Will account for space allocated for + * unused metadata fields as well. + */ + size_t getMetadataApproximateSize() const; + + /** * Copies all metadata from source if it has any. * Note: does not clear metadata from this. */ @@ -301,6 +308,9 @@ public: if (source.hasSearchScore()) { setSearchScore(source.getSearchScore()); } + if (source.hasSearchHighlights()) { + setSearchHighlights(source.getSearchHighlights()); + } } bool hasTextScore() const { @@ -369,6 +379,17 @@ public: _searchScore = score; } + bool hasSearchHighlights() const { + return _metaFields.test(MetaType::SEARCH_HIGHLIGHTS); + } + Value getSearchHighlights() const { + return _searchHighlights; + } + void setSearchHighlights(Value highlights) { + _metaFields.set(MetaType::SEARCH_HIGHLIGHTS); + _searchHighlights = highlights; + } + private: /// Same as lastElement->next() or firstElement() if empty. const ValueElement* end() const { @@ -454,7 +475,8 @@ private: double _geoNearDistance; Value _geoNearPoint; double _searchScore; - // When adding a field, make sure to update clone() method + Value _searchHighlights; + // When adding a field, make sure to update clone() and getMetadataApproximateSize() methods. // Defined in document.cpp static const DocumentStorage kEmptyDoc; diff --git a/src/mongo/db/pipeline/document_value_test.cpp b/src/mongo/db/pipeline/document_value_test.cpp index 438a980c10f..e758dadf2a3 100644 --- a/src/mongo/db/pipeline/document_value_test.cpp +++ b/src/mongo/db/pipeline/document_value_test.cpp @@ -523,6 +523,51 @@ TEST(MetaFields, RandValBasics) { ASSERT_EQ(2.0, doc2.getRandMetaField()); } +TEST(MetaFields, SearchScoreBasic) { + // Documents should not have a search score until it is set. + ASSERT_FALSE(Document().hasSearchScore()); + + // Setting the search score field should work as expected. + MutableDocument docBuilder; + docBuilder.setSearchScore(1.23); + Document doc = docBuilder.freeze(); + ASSERT_TRUE(doc.hasSearchScore()); + ASSERT_EQ(1.23, doc.getSearchScore()); + + // Setting the searchScore twice should keep the second value. + MutableDocument docBuilder2; + docBuilder2.setSearchScore(1.0); + docBuilder2.setSearchScore(2.0); + Document doc2 = docBuilder2.freeze(); + ASSERT_TRUE(doc2.hasSearchScore()); + ASSERT_EQ(2.0, doc2.getSearchScore()); +} + +TEST(MetaFields, SearchHighlightsBasic) { + // Documents should not have a search highlights until it is set. + ASSERT_FALSE(Document().hasSearchHighlights()); + + // Setting the search highlights field should work as expected. + MutableDocument docBuilder; + Value highlights = DOC_ARRAY("a"_sd + << "b"_sd); + docBuilder.setSearchHighlights(highlights); + Document doc = docBuilder.freeze(); + ASSERT_TRUE(doc.hasSearchHighlights()); + ASSERT_VALUE_EQ(doc.getSearchHighlights(), highlights); + + // Setting the searchHighlights twice should keep the second value. + MutableDocument docBuilder2; + Value otherHighlights = DOC_ARRAY("snippet1"_sd + << "snippet2"_sd + << "snippet3"_sd); + docBuilder2.setSearchHighlights(highlights); + docBuilder2.setSearchHighlights(otherHighlights); + Document doc2 = docBuilder2.freeze(); + ASSERT_TRUE(doc2.hasSearchHighlights()); + ASSERT_VALUE_EQ(doc2.getSearchHighlights(), otherHighlights); +} + class SerializationTest : public unittest::Test { protected: Document roundTrip(const Document& input) { @@ -538,10 +583,16 @@ protected: ASSERT_DOCUMENT_EQ(output, input); ASSERT_EQ(output.hasTextScore(), input.hasTextScore()); ASSERT_EQ(output.hasRandMetaField(), input.hasRandMetaField()); + ASSERT_EQ(output.hasSearchScore(), input.hasSearchScore()); + ASSERT_EQ(output.hasSearchHighlights(), input.hasSearchHighlights()); if (input.hasTextScore()) ASSERT_EQ(output.getTextScore(), input.getTextScore()); if (input.hasRandMetaField()) ASSERT_EQ(output.getRandMetaField(), input.getRandMetaField()); + if (input.hasSearchScore()) + ASSERT_EQ(output.getSearchScore(), input.getSearchScore()); + if (input.hasSearchHighlights()) + ASSERT_VALUE_EQ(output.getSearchHighlights(), input.getSearchHighlights()); ASSERT(output.toBson().binaryEqual(input.toBson())); } @@ -551,6 +602,9 @@ TEST_F(SerializationTest, MetaSerializationNoVals) { MutableDocument docBuilder; docBuilder.setTextScore(10.0); docBuilder.setRandMetaField(20.0); + docBuilder.setSearchScore(30.0); + docBuilder.setSearchHighlights(DOC_ARRAY("abc"_sd + << "def"_sd)); assertRoundTrips(docBuilder.freeze()); } @@ -559,6 +613,19 @@ TEST_F(SerializationTest, MetaSerializationWithVals) { MutableDocument docBuilder(DOC("foo" << 10)); docBuilder.setTextScore(10.0); docBuilder.setRandMetaField(20.0); + docBuilder.setSearchScore(30.0); + docBuilder.setSearchHighlights(DOC_ARRAY("abc"_sd + << "def"_sd)); + assertRoundTrips(docBuilder.freeze()); +} + +TEST_F(SerializationTest, MetaSerializationSearchHighlightsNonArray) { + MutableDocument docBuilder; + docBuilder.setTextScore(10.0); + docBuilder.setRandMetaField(20.0); + docBuilder.setSearchScore(30.0); + // Everything should still round trip even if the searchHighlights metadata isn't an array. + docBuilder.setSearchHighlights(Value(1.23)); assertRoundTrips(docBuilder.freeze()); } @@ -566,10 +633,17 @@ TEST(MetaFields, ToAndFromBson) { MutableDocument docBuilder; docBuilder.setTextScore(10.0); docBuilder.setRandMetaField(20.0); + docBuilder.setSearchScore(30.0); + docBuilder.setSearchHighlights(DOC_ARRAY("abc"_sd + << "def"_sd)); Document doc = docBuilder.freeze(); BSONObj obj = doc.toBsonWithMetaData(); ASSERT_EQ(10.0, obj[Document::metaFieldTextScore].Double()); ASSERT_EQ(20, obj[Document::metaFieldRandVal].numberLong()); + ASSERT_EQ(30.0, obj[Document::metaFieldSearchScore].Double()); + ASSERT_BSONOBJ_EQ(obj[Document::metaFieldSearchHighlights].embeddedObject(), + BSON_ARRAY("abc"_sd + << "def"_sd)); Document fromBson = Document::fromBsonWithMetaData(obj); ASSERT_TRUE(fromBson.hasTextScore()); ASSERT_TRUE(fromBson.hasRandMetaField()); @@ -577,6 +651,28 @@ TEST(MetaFields, ToAndFromBson) { ASSERT_EQ(20, fromBson.getRandMetaField()); } +TEST(MetaFields, MetaFieldsIncludedInDocumentApproximateSize) { + MutableDocument docBuilder; + docBuilder.setSearchHighlights(DOC_ARRAY("abc"_sd + << "def"_sd)); + const size_t smallMetadataDocSize = docBuilder.freeze().getApproximateSize(); + + // The second document has a larger "search highlights" object. + MutableDocument docBuilder2; + docBuilder2.setSearchHighlights(DOC_ARRAY("abc"_sd + << "def"_sd + << "ghijklmnop"_sd)); + Document doc2 = docBuilder2.freeze(); + const size_t bigMetadataDocSize = doc2.getApproximateSize(); + ASSERT_GT(bigMetadataDocSize, smallMetadataDocSize); + + // Do a sanity check on the amount of space taken by metadata in document 2. + ASSERT_LT(doc2.getMetadataApproximateSize(), 200U); + + Document emptyDoc; + ASSERT_LT(emptyDoc.getMetadataApproximateSize(), 100U); +} + TEST(MetaFields, BadSerialization) { // Write an unrecognized option to the buffer. BufBuilder bb; diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index 87a2d841e5e..0e8984a81fa 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -2519,6 +2519,8 @@ intrusive_ptr<Expression> ExpressionMeta::parse( return new ExpressionMeta(expCtx, MetaType::RAND_VAL); } else if (expr.valueStringData() == "searchScore") { return new ExpressionMeta(expCtx, MetaType::SEARCH_SCORE); + } else if (expr.valueStringData() == "searchHighlights") { + return new ExpressionMeta(expCtx, MetaType::SEARCH_HIGHLIGHTS); } else { uasserted(17308, "Unsupported argument to $meta: " + expr.String()); } @@ -2539,6 +2541,9 @@ Value ExpressionMeta::serialize(bool explain) const { case MetaType::SEARCH_SCORE: return Value(DOC("$meta" << "searchScore"_sd)); + case MetaType::SEARCH_HIGHLIGHTS: + return Value(DOC("$meta" + << "searchHighlights"_sd)); } MONGO_UNREACHABLE; } @@ -2551,6 +2556,8 @@ Value ExpressionMeta::evaluate(const Document& root) const { return root.hasRandMetaField() ? Value(root.getRandMetaField()) : Value(); case MetaType::SEARCH_SCORE: return root.hasSearchScore() ? Value(root.getSearchScore()) : Value(); + case MetaType::SEARCH_HIGHLIGHTS: + return root.hasSearchHighlights() ? Value(root.getSearchHighlights()) : Value(); } MONGO_UNREACHABLE; } @@ -2558,6 +2565,9 @@ Value ExpressionMeta::evaluate(const Document& root) const { void ExpressionMeta::_doAddDependencies(DepsTracker* deps) const { if (_metaType == MetaType::TEXT_SCORE) { deps->setNeedsMetadata(DepsTracker::MetadataType::TEXT_SCORE, true); + + // We do not add the dependencies for SEARCH_SCORE or SEARCH_HIGHLIGHTS because those values + // are not stored in the collection (or in mongod at all). } } diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index c042fa32391..7033ee717d1 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -1635,6 +1635,7 @@ private: TEXT_SCORE, RAND_VAL, SEARCH_SCORE, + SEARCH_HIGHLIGHTS, }; ExpressionMeta(const boost::intrusive_ptr<ExpressionContext>& expCtx, MetaType metaType); diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp index 58e1000ed08..30a5eb0dcd0 100644 --- a/src/mongo/db/pipeline/expression_test.cpp +++ b/src/mongo/db/pipeline/expression_test.cpp @@ -6005,6 +6005,34 @@ TEST(GetComputedPathsTest, ExpressionMapNotConsideredRenameWithDottedInputPath) } // namespace GetComputedPathsTest +namespace expression_meta_test { +TEST(ExpressionMetaTest, ExpressionMetaSearchScore) { + intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + VariablesParseState vps = expCtx->variablesParseState; + BSONObj expr = fromjson("{$meta: \"searchScore\"}"); + auto expressionMeta = ExpressionMeta::parse(expCtx, expr.firstElement(), vps); + + MutableDocument doc; + doc.setSearchScore(1.234); + Value val = expressionMeta->evaluate(doc.freeze()); + ASSERT_EQ(val.getDouble(), 1.234); +} + +TEST(ExpressionMetaTest, ExpressionMetaSearchHighlights) { + intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + VariablesParseState vps = expCtx->variablesParseState; + BSONObj expr = fromjson("{$meta: \"searchHighlights\"}"); + auto expressionMeta = ExpressionMeta::parse(expCtx, expr.firstElement(), vps); + + MutableDocument doc; + Document highlights = DOC("this part" << 1 << "is opaque to the server" << 1); + doc.setSearchHighlights(Value(highlights)); + + Value val = expressionMeta->evaluate(doc.freeze()); + ASSERT_DOCUMENT_EQ(val.getDocument(), highlights); +} +} // namespace expression_meta_test + namespace ExpressionRegexTest { class ExpressionRegexTest { |